From 999a913041a1434a324dd052bab1fa5e6f15bbf5 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 22 Nov 2024 18:11:21 -0500 Subject: [PATCH 01/62] Add code to build and write the tree from the data scanners --- .../checksum/ContainerMerkleTree.java | 19 +++++++++------ .../keyvalue/KeyValueContainerCheck.java | 24 +++++++++++++------ .../container/keyvalue/KeyValueHandler.java | 5 ++-- .../BackgroundContainerDataScanner.java | 7 +++--- .../OnDemandContainerDataScanner.java | 23 ++++++++++-------- .../container/ozoneimpl/OzoneContainer.java | 2 +- .../ContainerMerkleTreeTestUtils.java | 5 ++-- .../checksum/TestContainerMerkleTree.java | 18 +++++++------- 8 files changed, 60 insertions(+), 43 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index 7dba5b4309ce..56578dc9f1ef 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -56,10 +56,12 @@ public ContainerMerkleTree() { * If the block entry already exists, the chunks will be added to the existing chunks for that block. * * @param blockID The ID of the block that these chunks belong to. + * @param healthy True if there were no errors detected with these chunks. False indicates that all the chunks + * being added had errors. * @param chunks A list of chunks to add to this block. The chunks will be sorted internally by their offset. */ - public void addChunks(long blockID, Collection chunks) { - id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunks(chunks); + public void addChunks(long blockID, boolean healthy, ContainerProtos.ChunkInfo... chunks) { + id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunks(healthy, chunks); } /** @@ -106,11 +108,13 @@ private static class BlockMerkleTree { * Adds the specified chunks to this block. The offset value of the chunk must be unique within the block, * otherwise it will overwrite the previous value at that offset. * + * @param healthy True if there were no errors detected with these chunks. False indicates that all the chunks + * being added had errors. * @param chunks A list of chunks to add to this block. */ - public void addChunks(Collection chunks) { + public void addChunks(boolean healthy, ContainerProtos.ChunkInfo... chunks) { for (ContainerProtos.ChunkInfo chunk: chunks) { - offset2Chunk.put(chunk.getOffset(), new ChunkMerkleTree(chunk)); + offset2Chunk.put(chunk.getOffset(), new ChunkMerkleTree(chunk, healthy)); } } @@ -151,11 +155,12 @@ public ContainerProtos.BlockMerkleTree toProto() { * This class computes one checksum for the whole chunk by aggregating these. */ private static class ChunkMerkleTree { - private ContainerProtos.ChunkInfo chunk; - private boolean isHealthy = true; + private final ContainerProtos.ChunkInfo chunk; + private final boolean isHealthy; - ChunkMerkleTree(ContainerProtos.ChunkInfo chunk) { + ChunkMerkleTree(ContainerProtos.ChunkInfo chunk, boolean healthy) { this.chunk = chunk; + this.isHealthy = healthy; } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java index 080e4611dd94..5a9f1123adaa 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java @@ -192,7 +192,6 @@ public DataScanResult fullCheck(DataTransferThrottler throttler, Canceler cancel LOG.debug("Running data checks for container {}", containerID); try { - // TODO HDDS-10374 this tree will get updated with the container's contents as it is scanned. ContainerMerkleTree dataTree = new ContainerMerkleTree(); List dataErrors = scanData(dataTree, throttler, canceler); if (containerIsDeleted()) { @@ -422,6 +421,11 @@ private static List verifyChecksum(BlockData block, List scanErrors = new ArrayList<>(); + // Information used to populate the merkle tree. + ContainerProtos.ChunkInfo.Builder observedChunkBuilder = chunk.toBuilder(); + ContainerProtos.ChecksumData.Builder observedChecksums = chunk.getChecksumData().toBuilder(); + boolean chunkHealthy = true; + ChecksumData checksumData = ChecksumData.getFromProtoBuf(chunk.getChecksumData()); int checksumCount = checksumData.getChecksums().size(); @@ -434,10 +438,7 @@ private static List verifyChecksum(BlockData block, if (layout == ContainerLayoutVersion.FILE_PER_BLOCK) { channel.position(chunk.getOffset()); } - // Only report one error per chunk. Reporting corruption at every "bytes per checksum" interval will lead to a - // large amount of errors when a full chunk is corrupted. - boolean chunkHealthy = true; - for (int i = 0; i < checksumCount && chunkHealthy; i++) { + for (int i = 0; i < checksumCount; i++) { // limit last read for FILE_PER_BLOCK, to avoid reading next chunk if (layout == ContainerLayoutVersion.FILE_PER_BLOCK && i == checksumCount - 1 && @@ -457,7 +458,11 @@ private static List verifyChecksum(BlockData block, ByteString expected = checksumData.getChecksums().get(i); ByteString actual = cal.computeChecksum(buffer) .getChecksums().get(0); - if (!expected.equals(actual)) { + observedChecksums.addChecksums(actual); + // Only report one error per chunk. Reporting corruption at every "bytes per checksum" interval will lead to a + // large amount of errors when a full chunk is corrupted. + // Continue scanning the chunk even after the first error so the full merkle tree can be built. + if (chunkHealthy && !expected.equals(actual)) { String message = String .format("Inconsistent read for chunk=%s" + " checksum item %d" + @@ -469,26 +474,31 @@ private static List verifyChecksum(BlockData block, StringUtils.bytes2Hex(expected.asReadOnlyByteBuffer()), StringUtils.bytes2Hex(actual.asReadOnlyByteBuffer()), block.getBlockID()); + chunkHealthy = false; scanErrors.add(new ContainerScanError(FailureType.CORRUPT_CHUNK, chunkFile, new OzoneChecksumException(message))); - chunkHealthy = false; } } // If all the checksums match, also check that the length stored in the metadata matches the number of bytes // seen on the disk. + observedChunkBuilder.setLen(bytesRead); if (chunkHealthy && bytesRead != chunk.getLen()) { String message = String .format("Inconsistent read for chunk=%s expected length=%d" + " actual length=%d for block %s", chunk.getChunkName(), chunk.getLen(), bytesRead, block.getBlockID()); + chunkHealthy = false; scanErrors.add(new ContainerScanError(FailureType.INCONSISTENT_CHUNK_LENGTH, chunkFile, new IOException(message))); } } catch (IOException ex) { + chunkHealthy = false; scanErrors.add(new ContainerScanError(FailureType.MISSING_CHUNK_FILE, chunkFile, ex)); } + observedChunkBuilder.setChecksumData(observedChecksums); + currentTree.addChunks(block.getBlockID().getLocalID(), chunkHealthy, observedChunkBuilder.build()); return scanErrors; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index d587748e6f80..4babdd144f8c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -560,8 +560,9 @@ private void createContainerMerkleTree(Container container) { getBlockIterator(containerData.getContainerID())) { while (blockIterator.hasNext()) { BlockData blockData = blockIterator.nextBlock(); - List chunkInfos = blockData.getChunks(); - merkleTree.addChunks(blockData.getLocalID(), chunkInfos); + // All chunks are assumed to be healthy until the scanner inspects them to determine otherwise. + merkleTree.addChunks(blockData.getLocalID(), true, + blockData.getChunks().toArray(new ContainerProtos.ChunkInfo[0])); } } checksumManager.writeContainerDataTree(containerData, merkleTree); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java index 1a4f0bf64608..bf4d88626d8c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java @@ -103,17 +103,16 @@ public void scanContainer(Container c) metrics.incNumUnHealthyContainers(); } } - checksumManager.writeContainerDataTree(containerData, result.getDataTree()); metrics.incNumContainersScanned(); + checksumManager.writeContainerDataTree(containerData, result.getDataTree()); } - // Even if the container was deleted, mark the scan as completed since we already logged it as starting. Instant now = Instant.now(); - logScanCompleted(containerData, now); - if (!result.isDeleted()) { controller.updateDataScanTimestamp(containerId, now); } + // Even if the container was deleted, mark the scan as completed since we already logged it as starting. + logScanCompleted(containerData, now); } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index eb0f3eedb037..427104d9a730 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -20,10 +20,10 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; +import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,9 +56,11 @@ public final class OnDemandContainerDataScanner { .KeySetView containerRescheduleCheckSet; private final OnDemandScannerMetrics metrics; private final long minScanGap; + private final ContainerChecksumTreeManager checksumManager; private OnDemandContainerDataScanner( - ContainerScannerConfiguration conf, ContainerController controller) { + ContainerScannerConfiguration conf, ContainerController controller, + ContainerChecksumTreeManager checksumManager) { containerController = controller; throttler = new DataTransferThrottler( conf.getOnDemandBandwidthPerVolume()); @@ -67,16 +69,17 @@ private OnDemandContainerDataScanner( scanExecutor = Executors.newSingleThreadExecutor(); containerRescheduleCheckSet = ConcurrentHashMap.newKeySet(); minScanGap = conf.getContainerScanMinGap(); + this.checksumManager = checksumManager; } - public static synchronized void init( - ContainerScannerConfiguration conf, ContainerController controller) { + public static synchronized void init(ContainerScannerConfiguration conf, ContainerController controller, + ContainerChecksumTreeManager checksumManager) { if (instance != null) { LOG.warn("Trying to initialize on demand scanner" + " a second time on a datanode."); return; } - instance = new OnDemandContainerDataScanner(conf, controller); + instance = new OnDemandContainerDataScanner(conf, controller, checksumManager); } private static boolean shouldScan(Container container) { @@ -133,7 +136,7 @@ private static void performOnDemandScan(Container container) { ContainerData containerData = container.getContainerData(); logScanStart(containerData); - ScanResult result = container.scanData(instance.throttler, instance.canceler); + DataScanResult result = container.scanData(instance.throttler, instance.canceler); // Metrics for skipped containers should not be updated. if (result.isDeleted()) { LOG.debug("Container [{}] has been deleted during the data scan.", containerId); @@ -146,17 +149,17 @@ private static void performOnDemandScan(Container container) { instance.metrics.incNumUnHealthyContainers(); } } - // TODO HDDS-10374 will need to update the merkle tree here as well. instance.metrics.incNumContainersScanned(); + instance.checksumManager.writeContainerDataTree(containerData, result.getDataTree()); } - // Even if the container was deleted, mark the scan as completed since we already logged it as starting. Instant now = Instant.now(); - logScanCompleted(containerData, now); - if (!result.isDeleted()) { instance.containerController.updateDataScanTimestamp(containerId, now); } + + // Even if the container was deleted, mark the scan as completed since we already logged it as starting. + logScanCompleted(containerData, now); } catch (IOException e) { LOG.warn("Unexpected exception while scanning container " + containerId, e); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index 8ae838a7e536..66f3c5f39cab 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -407,7 +407,7 @@ private void initOnDemandContainerScanner(ContainerScannerConfiguration c) { "so the on-demand container data scanner will not start."); return; } - OnDemandContainerDataScanner.init(c, controller); + OnDemandContainerDataScanner.init(c, controller, checksumTreeManager); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index db2a8c319b67..95aff8ab1e04 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -149,11 +149,10 @@ public static ContainerMerkleTree buildTestTree(ConfigurationSource conf, int nu ContainerMerkleTree tree = new ContainerMerkleTree(); byte byteValue = 1; for (int blockIndex = 1; blockIndex <= numBlocks; blockIndex++) { - List chunks = new ArrayList<>(); for (int chunkIndex = 0; chunkIndex < 4; chunkIndex++) { - chunks.add(buildChunk(conf, chunkIndex, ByteBuffer.wrap(new byte[]{byteValue++, byteValue++, byteValue++}))); + tree.addChunks(blockIndex, true, + buildChunk(conf, chunkIndex, ByteBuffer.wrap(new byte[]{byteValue++, byteValue++, byteValue++}))); } - tree.addChunks(blockIndex, chunks); } return tree; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index 2f370c15b9f8..b449a5003f9d 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -71,7 +71,7 @@ public void testBuildOneChunkTree() { // Use the ContainerMerkleTree to build the same tree. ContainerMerkleTree actualTree = new ContainerMerkleTree(); - actualTree.addChunks(blockID, Collections.singletonList(chunk)); + actualTree.addChunks(blockID, true, chunk); // Ensure the trees match. ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); @@ -107,7 +107,7 @@ public void testBuildTreeWithMissingChunks() { // Use the ContainerMerkleTree to build the same tree. ContainerMerkleTree actualTree = new ContainerMerkleTree(); - actualTree.addChunks(blockID, Arrays.asList(chunk1, chunk3)); + actualTree.addChunks(blockID, true, chunk1, chunk3); // Ensure the trees match. ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); @@ -138,8 +138,8 @@ public void testBuildTreeWithNonContiguousBlockIDs() { // Use the ContainerMerkleTree to build the same tree. // Add blocks and chunks out of order to test sorting. ContainerMerkleTree actualTree = new ContainerMerkleTree(); - actualTree.addChunks(blockID3, Arrays.asList(b3c2, b3c1)); - actualTree.addChunks(blockID1, Arrays.asList(b1c1, b1c2)); + actualTree.addChunks(blockID3, true, b3c2, b3c1); + actualTree.addChunks(blockID1, true, b1c1, b1c2); // Ensure the trees match. ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); @@ -174,13 +174,13 @@ public void testAppendToBlocksWhileBuilding() throws Exception { // Test building by adding chunks to the blocks individually and out of order. ContainerMerkleTree actualTree = new ContainerMerkleTree(); // Add all of block 2 first. - actualTree.addChunks(blockID2, Arrays.asList(b2c1, b2c2)); + actualTree.addChunks(blockID2, true, b2c1, b2c2); // Then add block 1 in multiple steps wth chunks out of order. - actualTree.addChunks(blockID1, Collections.singletonList(b1c2)); - actualTree.addChunks(blockID1, Arrays.asList(b1c3, b1c1)); + actualTree.addChunks(blockID1, true, b1c2); + actualTree.addChunks(blockID1, true, b1c3, b1c1); // Add a duplicate chunk to block 3. It should overwrite the existing one. - actualTree.addChunks(blockID3, Arrays.asList(b3c1, b3c2)); - actualTree.addChunks(blockID3, Collections.singletonList(b3c2)); + actualTree.addChunks(blockID3, true, b3c1, b3c2); + actualTree.addChunks(blockID3, true, b3c2); // Ensure the trees match. ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); From b0d1ba9b111d35c3612f2882d8c04d01bc630efa Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 25 Nov 2024 15:33:12 -0500 Subject: [PATCH 02/62] Update todo in acceptance test --- .../dist/src/main/smoketest/admincli/container.robot | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot index fae08991781f..ec77b1a8566b 100644 --- a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot +++ b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot @@ -121,8 +121,7 @@ Cannot reconcile open container # At this point we should have an open Ratis Three container. ${container} = Execute ozone admin container list --state OPEN | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -n1 Execute and check rc ozone admin container reconcile "${container}" 255 - # The container should not yet have any replica checksums. - # TODO When the scanner is computing checksums automatically, this test may need to be updated. + # The container should not yet have any replica checksums since it is still open. ${data_checksum} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[].dataChecksum' | head -n1 # 0 is the hex value of an empty checksum. Should Be Equal As Strings 0 ${data_checksum} @@ -137,9 +136,8 @@ Close container Wait until keyword succeeds 1min 10sec Container is closed ${container} Reconcile closed container - # Check that info does not show replica checksums, since manual reconciliation has not yet been triggered. - # TODO When the scanner is computing checksums automatically, this test may need to be updated. ${container} = Execute ozone admin container list --state CLOSED | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -1 + # TODO wait for container close to populate the checksum. ${data_checksum} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[].dataChecksum' | head -n1 # 0 is the hex value of an empty checksum. Should Be Equal As Strings 0 ${data_checksum} From 382bce29afb32ddcdf8b938e8d2ae5bd49bca22e Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 25 Nov 2024 15:34:10 -0500 Subject: [PATCH 03/62] Add unit tests for tree generation by scanners based on container state --- .../TestBackgroundContainerDataScanner.java | 26 +++++++++-- .../TestOnDemandContainerDataScanner.java | 43 +++++++++++++++---- 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java index 681c5efc1af7..576a0477be4d 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.Test; @@ -32,6 +33,7 @@ import org.mockito.quality.Strictness; import java.time.Duration; +import java.util.Arrays; import java.util.Optional; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -48,7 +50,9 @@ import static org.mockito.Mockito.eq; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.atMostOnce; +import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -60,12 +64,13 @@ public class TestBackgroundContainerDataScanner extends TestContainerScannersAbstract { private BackgroundContainerDataScanner scanner; + private ContainerChecksumTreeManager mockChecksumManager; @BeforeEach public void setup() { super.setup(); - scanner = new BackgroundContainerDataScanner(conf, controller, vol, - new ContainerChecksumTreeManager(new OzoneConfiguration())); + mockChecksumManager = mock(ContainerChecksumTreeManager.class); + scanner = new BackgroundContainerDataScanner(conf, controller, vol, mockChecksumManager); } @Test @@ -224,7 +229,6 @@ public void testWithVolumeFailure() throws Exception { verify(openCorruptMetadata, never()).scanData(any(), any()); } - @Test @Override public void testShutdownDuringScan() throws Exception { @@ -244,6 +248,22 @@ public void testShutdownDuringScan() throws Exception { scanner.shutdown(); // The container should remain healthy. verifyContainerMarkedUnhealthy(healthy, never()); + } + + @Test + public void testMerkleTreeWritten() throws Exception { + scanner.runIteration(); + + // Merkle trees should not be written for open or deleted containers + for (Container container : Arrays.asList(openContainer, openCorruptMetadata, deletedContainer)) { + verify(mockChecksumManager, times(0)) + .writeContainerDataTree(eq(container.getContainerData()), any()); + } + // Merkle trees should be written for all other containers. + for (Container container : Arrays.asList(healthy, corruptData)) { + verify(mockChecksumManager, times(1)) + .writeContainerDataTree(eq(container.getContainerData()), any()); + } } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java index 6f4b2efc137c..502465c372d3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; import org.junit.jupiter.api.AfterEach; @@ -34,6 +36,7 @@ import java.time.Duration; import java.util.ArrayList; +import java.util.Arrays; import java.util.Optional; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; @@ -52,7 +55,9 @@ import static org.mockito.Mockito.eq; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.atMostOnce; +import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; @@ -64,9 +69,12 @@ public class TestOnDemandContainerDataScanner extends TestContainerScannersAbstract { + private ContainerChecksumTreeManager mockChecksumManager; + @BeforeEach public void setup() { super.setup(); + mockChecksumManager = mock(ContainerChecksumTreeManager .class); } @Test @@ -104,7 +112,7 @@ public void tearDown() { @Test public void testScanTimestampUpdated() throws Exception { - OnDemandContainerDataScanner.init(conf, controller); + OnDemandContainerDataScanner.init(conf, controller, mockChecksumManager); Optional> scanFuture = OnDemandContainerDataScanner.scanContainer(healthy); assertTrue(scanFuture.isPresent()); @@ -125,8 +133,8 @@ public void testScanTimestampUpdated() throws Exception { @Test public void testContainerScannerMultipleInitsAndShutdowns() throws Exception { - OnDemandContainerDataScanner.init(conf, controller); - OnDemandContainerDataScanner.init(conf, controller); + OnDemandContainerDataScanner.init(conf, controller, mockChecksumManager); + OnDemandContainerDataScanner.init(conf, controller, mockChecksumManager); OnDemandContainerDataScanner.shutdown(); OnDemandContainerDataScanner.shutdown(); //There shouldn't be an interaction after shutdown: @@ -136,7 +144,7 @@ public void testContainerScannerMultipleInitsAndShutdowns() throws Exception { @Test public void testSameContainerQueuedMultipleTimes() throws Exception { - OnDemandContainerDataScanner.init(conf, controller); + OnDemandContainerDataScanner.init(conf, controller, mockChecksumManager); //Given a container that has not finished scanning CountDownLatch latch = new CountDownLatch(1); when(corruptData.scanData( @@ -164,7 +172,7 @@ public void testSameContainerQueuedMultipleTimes() throws Exception { @Test @Override public void testScannerMetrics() throws Exception { - OnDemandContainerDataScanner.init(conf, controller); + OnDemandContainerDataScanner.init(conf, controller, mockChecksumManager); ArrayList>> resultFutureList = Lists.newArrayList(); resultFutureList.add(OnDemandContainerDataScanner.scanContainer( corruptData)); @@ -186,7 +194,7 @@ public void testScannerMetrics() throws Exception { @Test @Override public void testScannerMetricsUnregisters() { - OnDemandContainerDataScanner.init(conf, controller); + OnDemandContainerDataScanner.init(conf, controller, mockChecksumManager); String metricsName = OnDemandContainerDataScanner.getMetrics().getName(); assertNotNull(DefaultMetricsSystem.instance().getSource(metricsName)); OnDemandContainerDataScanner.shutdown(); @@ -226,7 +234,7 @@ public void testUnhealthyContainersDetected() throws Exception { public void testWithVolumeFailure() throws Exception { when(vol.isFailed()).thenReturn(true); - OnDemandContainerDataScanner.init(conf, controller); + OnDemandContainerDataScanner.init(conf, controller, mockChecksumManager); OnDemandScannerMetrics metrics = OnDemandContainerDataScanner.getMetrics(); scanContainer(healthy); @@ -251,7 +259,7 @@ public void testShutdownDuringScan() throws Exception { }); // Start the blocking scan. - OnDemandContainerDataScanner.init(conf, controller); + OnDemandContainerDataScanner.init(conf, controller, mockChecksumManager); OnDemandContainerDataScanner.scanContainer(healthy); // Shut down the on demand scanner. This will interrupt the blocked scan // on the healthy container. @@ -298,8 +306,25 @@ public void testUnhealthyContainerRescanned() throws Exception { assertEquals(1, metrics.getNumUnHealthyContainers()); } + @Test + public void testMerkleTreeWritten() throws Exception { + // Merkle trees should not be written for open or deleted containers + for (Container container : Arrays.asList(openContainer, openCorruptMetadata, deletedContainer)) { + scanContainer(container); + verify(mockChecksumManager, times(0)) + .writeContainerDataTree(eq(container.getContainerData()), any()); + } + + // Merkle trees should be written for all other containers. + for (Container container : Arrays.asList(healthy, corruptData)) { + scanContainer(container); + verify(mockChecksumManager, times(1)) + .writeContainerDataTree(eq(container.getContainerData()), any()); + } + } + private void scanContainer(Container container) throws Exception { - OnDemandContainerDataScanner.init(conf, controller); + OnDemandContainerDataScanner.init(conf, controller, mockChecksumManager); Optional> scanFuture = OnDemandContainerDataScanner.scanContainer(container); if (scanFuture.isPresent()) { From 28b18896665a3193f8d32cac9c7774d562fd5481 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 26 Nov 2024 11:46:20 -0500 Subject: [PATCH 04/62] Add initial (failing) unit test for KeyValueContaienrCheck Missing chunks are incorrectly reported as corrupted --- .../keyvalue/KeyValueContainerCheck.java | 10 +++-- .../keyvalue/TestKeyValueContainerCheck.java | 45 ++++++++++++++++--- 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java index 5a9f1123adaa..6e8225eecf96 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java @@ -421,9 +421,11 @@ private static List verifyChecksum(BlockData block, List scanErrors = new ArrayList<>(); - // Information used to populate the merkle tree. + // Information used to populate the merkle tree. Chunk metadata will be the same, but we must fill in the + // checksums with what we actually observe. ContainerProtos.ChunkInfo.Builder observedChunkBuilder = chunk.toBuilder(); - ContainerProtos.ChecksumData.Builder observedChecksums = chunk.getChecksumData().toBuilder(); + ContainerProtos.ChecksumData.Builder observedChecksumData = chunk.getChecksumData().toBuilder(); + observedChecksumData.clearChecksums(); boolean chunkHealthy = true; ChecksumData checksumData = @@ -458,7 +460,7 @@ private static List verifyChecksum(BlockData block, ByteString expected = checksumData.getChecksums().get(i); ByteString actual = cal.computeChecksum(buffer) .getChecksums().get(0); - observedChecksums.addChecksums(actual); + observedChecksumData.addChecksums(actual); // Only report one error per chunk. Reporting corruption at every "bytes per checksum" interval will lead to a // large amount of errors when a full chunk is corrupted. // Continue scanning the chunk even after the first error so the full merkle tree can be built. @@ -497,7 +499,7 @@ private static List verifyChecksum(BlockData block, scanErrors.add(new ContainerScanError(FailureType.MISSING_CHUNK_FILE, chunkFile, ex)); } - observedChunkBuilder.setChecksumData(observedChecksums); + observedChunkBuilder.setChecksumData(observedChecksumData); currentTree.addChunks(block.getBlockID().getLocalID(), chunkHealthy, observedChunkBuilder.build()); return scanErrors; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java index b6da73b7ea62..c51994fe30c9 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java @@ -24,6 +24,9 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; +import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; +import org.apache.hadoop.ozone.container.checksum.ContainerDiffReport; +import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTree; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator; @@ -46,6 +49,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -156,13 +160,20 @@ public void testAllDataErrorsCollected(ContainerTestVersionInfo versionInfo) thr DataScanResult result = kvCheck.fullCheck(throttler, null); assertTrue(result.isHealthy()); + ContainerProtos.ContainerChecksumInfo healthyChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() + .setContainerID(containerID) + .setContainerMerkleTree(result.getDataTree().toProto()) + .build(); // Put different types of block failures in the middle of the container. - CORRUPT_BLOCK.applyTo(container, 1); - MISSING_BLOCK.applyTo(container, 2); - TRUNCATED_BLOCK.applyTo(container, 4); + long corruptBlockID = 1; + long missingBlockID = 2; + long truncatedBlockID = 4; + CORRUPT_BLOCK.applyTo(container, corruptBlockID); + MISSING_BLOCK.applyTo(container, missingBlockID); + TRUNCATED_BLOCK.applyTo(container, truncatedBlockID); List expectedErrors = new ArrayList<>(); - // Corruption is applied to two different chunks within the block. + // Corruption is applied to two different chunks within the block, so the error will be raised twice. expectedErrors.add(CORRUPT_BLOCK.getExpectedResult()); expectedErrors.add(CORRUPT_BLOCK.getExpectedResult()); expectedErrors.add(MISSING_BLOCK.getExpectedResult()); @@ -177,12 +188,36 @@ public void testAllDataErrorsCollected(ContainerTestVersionInfo versionInfo) thr assertFalse(result.isHealthy()); // Check that all data errors were detected in order. - // TODO HDDS-10374 Use merkle tree to check the actual content affected by the errors. assertEquals(expectedErrors.size(), result.getErrors().size()); List actualErrors = result.getErrors().stream() .map(ContainerScanError::getFailureType) .collect(Collectors.toList()); assertEquals(expectedErrors, actualErrors); + + // Write the new tree into the container, as the scanner would do. + ContainerChecksumTreeManager checksumManager = new ContainerChecksumTreeManager(conf); + checksumManager.writeContainerDataTree(container.getContainerData(), result.getDataTree()); + // This will read the corrupted tree from the disk, which represents the current state of the container, and + // compare it against the original healthy tree. The diff we get back should match the failures we injected. + ContainerDiffReport diffReport = checksumManager.diff(container.getContainerData(), healthyChecksumInfo); + + // Check that the new tree identified all the expected errors by checking the diff. + Map> corruptChunks = diffReport.getCorruptChunks(); + // One block had corrupted chunks. + assertEquals(1, corruptChunks.size()); + List corruptChunksInBlock = corruptChunks.get(corruptBlockID); + assertEquals(2, corruptChunksInBlock.size()); + + // Check missing block was correctly identified in the tree diff. + List missingBlocks = diffReport.getMissingBlocks(); + assertEquals(1, missingBlocks.size()); + assertEquals(missingBlockID, missingBlocks.get(0).getBlockID()); + + // One block was truncated which resulted in all of its chunks being reported as missing. + Map> missingChunks = diffReport.getMissingChunks(); + assertEquals(1, missingChunks.size()); + List missingChunksInBlock = missingChunks.get(truncatedBlockID); + assertEquals(CHUNKS_PER_BLOCK, missingChunksInBlock.size()); } /** From dc182e85ce7bdf90186ca635ecd1f20ce872cd60 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 26 Nov 2024 12:45:20 -0500 Subject: [PATCH 05/62] Update container data checksum when building the tree --- .../checksum/ContainerChecksumTreeManager.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 2c10313c2fc9..9a739668eaa5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -82,6 +82,8 @@ public void stop() { * The data merkle tree within the file is replaced with the {@code tree} parameter, but all other content of the * file remains unchanged. * Concurrent writes to the same file are coordinated internally. + * This method also updates the container's data checksum in the {@code data} parameter, which will be seen by SCM + * on container reports. */ public void writeContainerDataTree(ContainerData data, ContainerMerkleTree tree) throws IOException { long containerID = data.getContainerID(); @@ -99,11 +101,15 @@ public void writeContainerDataTree(ContainerData data, ContainerMerkleTree tree) checksumInfoBuilder = ContainerProtos.ContainerChecksumInfo.newBuilder(); } + ContainerProtos.ContainerMerkleTree treeProto = captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(), + tree::toProto); + long dataChecksum = treeProto.getDataChecksum(); + data.setDataChecksum(dataChecksum); checksumInfoBuilder .setContainerID(containerID) - .setContainerMerkleTree(captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(), tree::toProto)); + .setContainerMerkleTree(treeProto); write(data, checksumInfoBuilder.build()); - LOG.debug("Data merkle tree for container {} updated", containerID); + LOG.debug("Data merkle tree for container {} updated with container checksum {}", containerID, dataChecksum); } finally { writeLock.unlock(); } From a3401a93835e59193feedf0f3528dd9cd398c1a5 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 6 Jan 2025 19:41:29 -0500 Subject: [PATCH 06/62] Fix handling of fully truncated block of 0 size --- .../checksum/ContainerMerkleTree.java | 10 +++++ .../keyvalue/KeyValueContainerCheck.java | 45 +++++++++++++------ .../ozoneimpl/ContainerScanError.java | 3 +- .../keyvalue/TestContainerCorruptions.java | 4 +- .../keyvalue/TestKeyValueContainerCheck.java | 15 ++++--- 5 files changed, 55 insertions(+), 22 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index 56578dc9f1ef..651f6fbb0a74 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -64,6 +64,16 @@ public void addChunks(long blockID, boolean healthy, ContainerProtos.ChunkInfo.. id2Block.computeIfAbsent(blockID, BlockMerkleTree::new).addChunks(healthy, chunks); } + /** + * Adds an empty block to the tree. This method is not a pre-requisite to {@code addChunks}. + * If the block entry already exists, it will not be modified. + * + * @param blockID The ID of the empty block to add to the tree + */ + public void addBlock(long blockID) { + addChunks(blockID, true); + } + /** * Uses chunk hashes to compute all remaining hashes in the tree, and returns it as a protobuf object. No checksum * computation for the tree happens outside of this method. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java index 6e8225eecf96..3b4e507e1984 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java @@ -378,12 +378,13 @@ private List scanBlock(DBHandle db, File dbFile, BlockData b // So, we need to make sure, chunk length > 0, before declaring // the missing chunk file. if (!block.getChunks().isEmpty() && block.getChunks().get(0).getLen() > 0) { - ContainerScanError error = new ContainerScanError(FailureType.MISSING_CHUNK_FILE, + ContainerScanError error = new ContainerScanError(FailureType.MISSING_DATA_FILE, new File(containerDataFromDisk.getChunksPath()), new IOException("Missing chunk file " + chunkFile.getAbsolutePath())); blockErrors.add(error); } } else if (chunk.getChecksumData().getType() != ContainerProtos.ChecksumType.NONE) { + currentTree.addBlock(block.getBlockID().getLocalID()); int bytesPerChecksum = chunk.getChecksumData().getBytesPerChecksum(); ByteBuffer buffer = BUFFER_POOL.getBuffer(bytesPerChecksum); // Keep scanning the block even if there are errors with individual chunks. @@ -427,6 +428,7 @@ private static List verifyChecksum(BlockData block, ContainerProtos.ChecksumData.Builder observedChecksumData = chunk.getChecksumData().toBuilder(); observedChecksumData.clearChecksums(); boolean chunkHealthy = true; + boolean chunkMissing = false; ChecksumData checksumData = ChecksumData.getFromProtoBuf(chunk.getChecksumData()); @@ -481,26 +483,41 @@ private static List verifyChecksum(BlockData block, new OzoneChecksumException(message))); } } - // If all the checksums match, also check that the length stored in the metadata matches the number of bytes - // seen on the disk. + observedChunkBuilder.setLen(bytesRead); + // If we haven't seen any errors after scanning the whole chunk, verify that the length stored in the metadata + // matches the number of bytes seen on the disk. if (chunkHealthy && bytesRead != chunk.getLen()) { - String message = String - .format("Inconsistent read for chunk=%s expected length=%d" - + " actual length=%d for block %s", - chunk.getChunkName(), - chunk.getLen(), bytesRead, block.getBlockID()); - chunkHealthy = false; - scanErrors.add(new ContainerScanError(FailureType.INCONSISTENT_CHUNK_LENGTH, chunkFile, - new IOException(message))); + if (bytesRead == 0) { + // If we could not find any data for the chunk, report it as missing. + chunkMissing = true; + chunkHealthy = false; + String message = String.format("Missing chunk=%s with expected length=%d for block %s", + chunk.getChunkName(), chunk.getLen(), block.getBlockID()); + scanErrors.add(new ContainerScanError(FailureType.MISSING_CHUNK, chunkFile, new IOException(message))); + } else { + // We found data for the chunk, but it was shorter than expected. + String message = String + .format("Inconsistent read for chunk=%s expected length=%d" + + " actual length=%d for block %s", + chunk.getChunkName(), + chunk.getLen(), bytesRead, block.getBlockID()); + chunkHealthy = false; + scanErrors.add(new ContainerScanError(FailureType.INCONSISTENT_CHUNK_LENGTH, chunkFile, + new IOException(message))); + } } } catch (IOException ex) { + // An unknown error occurred trying to access the chunk. Report it as corrupted. chunkHealthy = false; - scanErrors.add(new ContainerScanError(FailureType.MISSING_CHUNK_FILE, chunkFile, ex)); + scanErrors.add(new ContainerScanError(FailureType.CORRUPT_CHUNK, chunkFile, ex)); } - observedChunkBuilder.setChecksumData(observedChecksumData); - currentTree.addChunks(block.getBlockID().getLocalID(), chunkHealthy, observedChunkBuilder.build()); + // Missing chunks should not be added to the merkle tree. + if (!chunkMissing) { + observedChunkBuilder.setChecksumData(observedChecksumData); + currentTree.addChunks(block.getBlockID().getLocalID(), chunkHealthy, observedChunkBuilder.build()); + } return scanErrors; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanError.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanError.java index 8fbd8f6887e1..2c4648e22fdf 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanError.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanError.java @@ -32,9 +32,10 @@ public enum FailureType { MISSING_METADATA_DIR, MISSING_CONTAINER_FILE, MISSING_CHUNKS_DIR, - MISSING_CHUNK_FILE, + MISSING_DATA_FILE, CORRUPT_CONTAINER_FILE, CORRUPT_CHUNK, + MISSING_CHUNK, INCONSISTENT_CHUNK_LENGTH, INACCESSIBLE_DB, WRITE_FAILURE, diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java index 470197e1f824..c61f8906b921 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java @@ -93,7 +93,7 @@ public enum TestContainerCorruptions { MISSING_BLOCK((container, blockID) -> { File blockFile = getBlock(container, blockID); assertTrue(blockFile.delete()); - }, ContainerScanError.FailureType.MISSING_CHUNK_FILE), + }, ContainerScanError.FailureType.MISSING_DATA_FILE), CORRUPT_CONTAINER_FILE((container, blockID) -> { File containerFile = container.getContainerFile(); @@ -113,7 +113,7 @@ public enum TestContainerCorruptions { TRUNCATED_BLOCK((container, blockID) -> { File blockFile = getBlock(container, blockID); truncateFile(blockFile); - }, ContainerScanError.FailureType.INCONSISTENT_CHUNK_LENGTH); + }, ContainerScanError.FailureType.MISSING_CHUNK); private final BiConsumer, Long> corruption; private final ContainerScanError.FailureType expectedResult; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java index c51994fe30c9..e4d0206bd5d3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java @@ -160,6 +160,8 @@ public void testAllDataErrorsCollected(ContainerTestVersionInfo versionInfo) thr DataScanResult result = kvCheck.fullCheck(throttler, null); assertTrue(result.isHealthy()); + // The scanner would write the checksum file to disk. `KeyValueContainerCheck` does not, so we will create the + // result here. ContainerProtos.ContainerChecksumInfo healthyChecksumInfo = ContainerProtos.ContainerChecksumInfo.newBuilder() .setContainerID(containerID) .setContainerMerkleTree(result.getDataTree().toProto()) @@ -201,6 +203,8 @@ public void testAllDataErrorsCollected(ContainerTestVersionInfo versionInfo) thr // compare it against the original healthy tree. The diff we get back should match the failures we injected. ContainerDiffReport diffReport = checksumManager.diff(container.getContainerData(), healthyChecksumInfo); + LOG.info("Diff of healthy container with actual container {}", diffReport); + // Check that the new tree identified all the expected errors by checking the diff. Map> corruptChunks = diffReport.getCorruptChunks(); // One block had corrupted chunks. @@ -208,16 +212,17 @@ public void testAllDataErrorsCollected(ContainerTestVersionInfo versionInfo) thr List corruptChunksInBlock = corruptChunks.get(corruptBlockID); assertEquals(2, corruptChunksInBlock.size()); - // Check missing block was correctly identified in the tree diff. - List missingBlocks = diffReport.getMissingBlocks(); - assertEquals(1, missingBlocks.size()); - assertEquals(missingBlockID, missingBlocks.get(0).getBlockID()); - // One block was truncated which resulted in all of its chunks being reported as missing. Map> missingChunks = diffReport.getMissingChunks(); assertEquals(1, missingChunks.size()); List missingChunksInBlock = missingChunks.get(truncatedBlockID); assertEquals(CHUNKS_PER_BLOCK, missingChunksInBlock.size()); + + // Check missing block was correctly identified in the tree diff. + List missingBlocks = diffReport.getMissingBlocks(); + assertEquals(1, missingBlocks.size()); + assertEquals(missingBlockID, missingBlocks.get(0).getBlockID()); + } /** From a25d44d221781b7e3cee30fa31f464a1e9c4d793 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 6 Jan 2025 20:03:25 -0500 Subject: [PATCH 07/62] Add unit tests for new addBlock method in tree --- .../checksum/TestContainerMerkleTree.java | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java index b449a5003f9d..2905ea7d6483 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerMerkleTree.java @@ -114,6 +114,42 @@ public void testBuildTreeWithMissingChunks() { assertTreesSortedAndMatch(expectedTree, actualTreeProto); } + @Test + public void testBuildTreeWithEmptyBlock() { + final long blockID = 1; + ContainerProtos.BlockMerkleTree blockTree = buildExpectedBlockTree(blockID, Collections.emptyList()); + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); + + // Use the ContainerMerkleTree to build the same tree. + ContainerMerkleTree actualTree = new ContainerMerkleTree(); + actualTree.addBlock(blockID); + + // Ensure the trees match. + ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); + assertTreesSortedAndMatch(expectedTree, actualTreeProto); + } + + @Test + public void testAddBlockIdempotent() { + final long blockID = 1; + // Build the expected proto. + ContainerProtos.ChunkInfo chunk1 = buildChunk(config, 0, ByteBuffer.wrap(new byte[]{1, 2, 3})); + ContainerProtos.BlockMerkleTree blockTree = buildExpectedBlockTree(blockID, + Collections.singletonList(buildExpectedChunkTree(chunk1))); + ContainerProtos.ContainerMerkleTree expectedTree = buildExpectedContainerTree(Collections.singletonList(blockTree)); + + // Use the ContainerMerkleTree to build the same tree, calling addBlock in between adding chunks. + ContainerMerkleTree actualTree = new ContainerMerkleTree(); + actualTree.addBlock(blockID); + actualTree.addChunks(blockID, true, chunk1); + // This should not overwrite the chunk already added to the block. + actualTree.addBlock(blockID); + + // Ensure the trees match. + ContainerProtos.ContainerMerkleTree actualTreeProto = actualTree.toProto(); + assertTreesSortedAndMatch(expectedTree, actualTreeProto); + } + /** * A container is a set of blocks. Make sure the tree implementation is not dependent on continuity of block IDs. */ From 7550a3c85d28fd49a3fb746e0962fe67fec9f6be Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 6 Jan 2025 20:22:51 -0500 Subject: [PATCH 08/62] Test that SCM gets a checksum with the container report --- ...stBackgroundContainerDataScannerIntegration.java | 5 +++++ .../TestContainerScannerIntegrationAbstract.java | 13 ++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java index 80dbda64bfcb..de6818cfda32 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java @@ -35,6 +35,7 @@ import java.util.concurrent.TimeUnit; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; /** * Integration tests for the background container data scanner. This scanner @@ -95,6 +96,10 @@ void testCorruptionDetected(TestContainerCorruptions corruption) corruption == TestContainerCorruptions.CORRUPT_BLOCK) { // These errors will affect multiple chunks and result in multiple log messages. corruption.assertLogged(containerID, logCapturer); + // Check a corresponding checksum reported to SCM. This would be zero for metadata errors. + // TODO HDDS-11942 This check can be made generic for all faults because every fault provided to the test will + // declare its expected checksum. + assertNotEquals(0, getContainerReplica(containerID).getDataChecksum()); } else { // Other corruption types will only lead to a single error. corruption.assertLogged(containerID, 1, logCapturer); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java index 5f6c14bcde7a..cfaaa32fbca4 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java @@ -121,11 +121,8 @@ static void shutdown() throws IOException { protected void waitForScmToSeeUnhealthyReplica(long containerID) throws Exception { - ContainerManager scmContainerManager = cluster.getStorageContainerManager() - .getContainerManager(); LambdaTestUtils.await(5000, 500, - () -> getContainerReplica(scmContainerManager, containerID) - .getState() == State.UNHEALTHY); + () -> getContainerReplica(containerID).getState() == State.UNHEALTHY); } protected void waitForScmToCloseContainer(long containerID) throws Exception { @@ -188,11 +185,9 @@ protected byte[] getTestData() { .getBytes(UTF_8); } - protected ContainerReplica getContainerReplica( - ContainerManager cm, long containerId) throws ContainerNotFoundException { - Set containerReplicas = cm.getContainerReplicas( - ContainerID.valueOf( - containerId)); + protected ContainerReplica getContainerReplica(long containerId) throws ContainerNotFoundException { + ContainerManager cm = cluster.getStorageContainerManager().getContainerManager(); + Set containerReplicas = cm.getContainerReplicas(ContainerID.valueOf(containerId)); // Only using a single datanode cluster. assertEquals(1, containerReplicas.size()); return containerReplicas.iterator().next(); From 847f8d86abd318ea3354bc04d7b6c32b258eb380 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 6 Jan 2025 21:08:24 -0500 Subject: [PATCH 09/62] Add (failing) tests that SCM sees updated checksums --- .../ContainerChecksumTreeManager.java | 2 +- .../ContainerMerkleTreeTestUtils.java | 5 ++--- .../hadoop/hdds/scm/TestCloseContainer.java | 16 +++++++-------- ...groundContainerDataScannerIntegration.java | 20 +++++++++++++------ ...ndContainerMetadataScannerIntegration.java | 16 +++++++++++++-- ...stContainerScannerIntegrationAbstract.java | 12 +++++++++-- ...DemandContainerDataScannerIntegration.java | 11 +++++++++- 7 files changed, 59 insertions(+), 23 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 9a739668eaa5..036208efeb29 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -390,7 +390,7 @@ public ContainerMerkleTreeMetrics getMetrics() { return this.metrics; } - public static boolean checksumFileExist(Container container) { + public static boolean checksumFileExist(Container container) { File checksumFile = getContainerChecksumFile(container.getContainerData()); return checksumFile.exists(); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index 95aff8ab1e04..140fff1517b2 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -333,10 +333,9 @@ private static void assertEqualsChunkMerkleTree(List container = ozoneContainer.getController().getContainer(containerID); return ContainerChecksumTreeManager.checksumFileExist(container); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java index 35434500391b..96a60429a232 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestCloseContainer.java @@ -133,14 +133,14 @@ public void testReplicasAreReportedForClosedContainerAfterRestart() // Checksum file doesn't exist before container close List hddsDatanodes = cluster.getHddsDatanodes(); for (HddsDatanodeService hddsDatanode: hddsDatanodes) { - assertFalse(containerChecksumFileExists(hddsDatanode, container)); + assertFalse(containerChecksumFileExists(hddsDatanode, container.getContainerID())); } OzoneTestUtils.closeContainer(scm, container); // Checksum file exists after container close for (HddsDatanodeService hddsDatanode: hddsDatanodes) { GenericTestUtils.waitFor(() -> checkContainerCloseInDatanode(hddsDatanode, container), 100, 5000); - assertTrue(containerChecksumFileExists(hddsDatanode, container)); + assertTrue(containerChecksumFileExists(hddsDatanode, container.getContainerID())); } long originalSeq = container.getSequenceId(); @@ -187,7 +187,7 @@ public void testCloseClosedContainer() // Checksum file doesn't exist before container close List hddsDatanodes = cluster.getHddsDatanodes(); for (HddsDatanodeService hddsDatanode: hddsDatanodes) { - assertFalse(containerChecksumFileExists(hddsDatanode, container)); + assertFalse(containerChecksumFileExists(hddsDatanode, container.getContainerID())); } // Close container OzoneTestUtils.closeContainer(scm, container); @@ -195,7 +195,7 @@ public void testCloseClosedContainer() // Checksum file exists after container close for (HddsDatanodeService hddsDatanode: hddsDatanodes) { GenericTestUtils.waitFor(() -> checkContainerCloseInDatanode(hddsDatanode, container), 100, 5000); - assertTrue(containerChecksumFileExists(hddsDatanode, container)); + assertTrue(containerChecksumFileExists(hddsDatanode, container.getContainerID())); } assertThrows(IOException.class, @@ -215,7 +215,7 @@ public void testContainerChecksumForClosedContainer() throws Exception { // Checksum file doesn't exist before container close List hddsDatanodes = cluster.getHddsDatanodes(); for (HddsDatanodeService hddsDatanode : hddsDatanodes) { - assertFalse(containerChecksumFileExists(hddsDatanode, containerInfo1)); + assertFalse(containerChecksumFileExists(hddsDatanode, containerInfo1.getContainerID())); } // Close container. OzoneTestUtils.closeContainer(scm, containerInfo1); @@ -224,7 +224,7 @@ public void testContainerChecksumForClosedContainer() throws Exception { // merkle tree for all the datanodes for (HddsDatanodeService hddsDatanode : hddsDatanodes) { GenericTestUtils.waitFor(() -> checkContainerCloseInDatanode(hddsDatanode, containerInfo1), 100, 5000); - assertTrue(containerChecksumFileExists(hddsDatanode, containerInfo1)); + assertTrue(containerChecksumFileExists(hddsDatanode, containerInfo1.getContainerID())); OzoneContainer ozoneContainer = hddsDatanode.getDatanodeStateMachine().getContainer(); Container container1 = ozoneContainer.getController().getContainer(containerInfo1.getContainerID()); ContainerProtos.ContainerChecksumInfo containerChecksumInfo = ContainerMerkleTreeTestUtils.readChecksumFile( @@ -242,7 +242,7 @@ public void testContainerChecksumForClosedContainer() throws Exception { ReplicationType.RATIS, "this is the different content"); ContainerInfo containerInfo2 = scm.getContainerManager().getContainers().get(1); for (HddsDatanodeService hddsDatanode : hddsDatanodes) { - assertFalse(containerChecksumFileExists(hddsDatanode, containerInfo2)); + assertFalse(containerChecksumFileExists(hddsDatanode, containerInfo2.getContainerID())); } // Close container. @@ -252,7 +252,7 @@ public void testContainerChecksumForClosedContainer() throws Exception { // merkle tree for all the datanodes for (HddsDatanodeService hddsDatanode : hddsDatanodes) { GenericTestUtils.waitFor(() -> checkContainerCloseInDatanode(hddsDatanode, containerInfo2), 100, 5000); - assertTrue(containerChecksumFileExists(hddsDatanode, containerInfo2)); + assertTrue(containerChecksumFileExists(hddsDatanode, containerInfo2.getContainerID())); OzoneContainer ozoneContainer = hddsDatanode.getDatanodeStateMachine().getContainer(); Container container2 = ozoneContainer.getController().getContainer(containerInfo2.getContainerID()); ContainerProtos.ContainerChecksumInfo containerChecksumInfo = ContainerMerkleTreeTestUtils.readChecksumFile( diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java index de6818cfda32..b4910278257a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java @@ -20,7 +20,10 @@ package org.apache.hadoop.ozone.dn.scanner; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; import org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions; @@ -34,8 +37,11 @@ import java.util.concurrent.TimeUnit; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * Integration tests for the background container data scanner. This scanner @@ -79,6 +85,10 @@ void testCorruptionDetected(TestContainerCorruptions corruption) // Container corruption has not yet been introduced. Container container = getDnContainer(containerID); assertEquals(State.CLOSED, container.getContainerState()); + assertTrue(containerChecksumFileExists(containerID)); + + waitForScmToSeeReplicaState(containerID, CLOSED); + long initialReportedDataChecksum = getContainerReplica(containerID).getDataChecksum(); corruption.applyTo(container); @@ -89,17 +99,15 @@ void testCorruptionDetected(TestContainerCorruptions corruption) () -> container.getContainerState() == State.UNHEALTHY, 500, 15_000); - // Wait for SCM to get a report of the unhealthy replica. - waitForScmToSeeUnhealthyReplica(containerID); + // Wait for SCM to get a report of the unhealthy replica with a different checksum than before. + waitForScmToSeeReplicaState(containerID, UNHEALTHY); + long newReportedDataChecksum = getContainerReplica(containerID).getDataChecksum(); + assertNotEquals(initialReportedDataChecksum, newReportedDataChecksum); if (corruption == TestContainerCorruptions.TRUNCATED_BLOCK || corruption == TestContainerCorruptions.CORRUPT_BLOCK) { // These errors will affect multiple chunks and result in multiple log messages. corruption.assertLogged(containerID, logCapturer); - // Check a corresponding checksum reported to SCM. This would be zero for metadata errors. - // TODO HDDS-11942 This check can be made generic for all faults because every fault provided to the test will - // declare its expected checksum. - assertNotEquals(0, getContainerReplica(containerID).getDataChecksum()); } else { // Other corruption types will only lead to a single error. corruption.assertLogged(containerID, 1, logCapturer); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java index 0678190d47c3..9c21aa5836c7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java @@ -36,7 +36,11 @@ import java.util.Collection; import java.util.concurrent.TimeUnit; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * Integration tests for the background container metadata scanner. This @@ -92,10 +96,16 @@ void testCorruptionDetected(TestContainerCorruptions corruption) long closedContainerID = writeDataThenCloseContainer(); Container closedContainer = getDnContainer(closedContainerID); assertEquals(State.CLOSED, closedContainer.getContainerState()); + assertTrue(containerChecksumFileExists(closedContainerID)); + waitForScmToSeeReplicaState(closedContainerID, CLOSED); + long initialReportedClosedChecksum = getContainerReplica(closedContainerID).getDataChecksum(); long openContainerID = writeDataToOpenContainer(); Container openContainer = getDnContainer(openContainerID); assertEquals(State.OPEN, openContainer.getContainerState()); + long initialReportedOpenChecksum = getContainerReplica(openContainerID).getDataChecksum(); + // Open containers should not yet have a checksum generated. + assertEquals(0, initialReportedOpenChecksum); // Corrupt both containers. corruption.applyTo(closedContainer); @@ -110,8 +120,10 @@ void testCorruptionDetected(TestContainerCorruptions corruption) 500, 5000); // Wait for SCM to get reports of the unhealthy replicas. - waitForScmToSeeUnhealthyReplica(closedContainerID); - waitForScmToSeeUnhealthyReplica(openContainerID); + waitForScmToSeeReplicaState(closedContainerID, UNHEALTHY); + assertNotEquals(initialReportedClosedChecksum, getContainerReplica(closedContainerID).getDataChecksum()); + waitForScmToSeeReplicaState(openContainerID, UNHEALTHY); + assertNotEquals(initialReportedOpenChecksum, getContainerReplica(openContainerID).getDataChecksum()); // Once the unhealthy replica is reported, the open container's lifecycle // state in SCM should move to closed. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java index cfaaa32fbca4..c2befa60d74b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java @@ -38,6 +38,7 @@ import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.container.ContainerTestHelper; import org.apache.hadoop.ozone.container.TestHelper; +import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; @@ -59,6 +60,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * This class tests the data scanner functionality. @@ -119,10 +121,10 @@ static void shutdown() throws IOException { } } - protected void waitForScmToSeeUnhealthyReplica(long containerID) + protected void waitForScmToSeeReplicaState(long containerID, State state) throws Exception { LambdaTestUtils.await(5000, 500, - () -> getContainerReplica(containerID).getState() == State.UNHEALTHY); + () -> getContainerReplica(containerID).getState() == state); } protected void waitForScmToCloseContainer(long containerID) throws Exception { @@ -143,6 +145,12 @@ protected Container getDnContainer(long containerID) { return getOzoneContainer().getContainerSet().getContainer(containerID); } + protected boolean containerChecksumFileExists(long containerID) { + assertEquals(1, cluster.getHddsDatanodes().size()); + HddsDatanodeService dn = cluster.getHddsDatanodes().get(0); + return ContainerMerkleTreeTestUtils.containerChecksumFileExists(dn, containerID); + } + protected long writeDataThenCloseContainer() throws Exception { return writeDataThenCloseContainer("keyName"); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java index af94506c8272..36a92e870455 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; import org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions; @@ -33,7 +34,10 @@ import java.util.Collection; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * Integration tests for the on demand container data scanner. This scanner @@ -98,6 +102,9 @@ void testCorruptionDetected(TestContainerCorruptions corruption) // Container corruption has not yet been introduced. Container container = getDnContainer(containerID); assertEquals(State.CLOSED, container.getContainerState()); + long initialReportedDataChecksum = getContainerReplica(containerID).getDataChecksum(); + assertTrue(containerChecksumFileExists(containerID)); + // Corrupt the container. corruption.applyTo(container); // This method will check that reading from the corrupted key returns an @@ -110,7 +117,9 @@ void testCorruptionDetected(TestContainerCorruptions corruption) 500, 5000); // Wait for SCM to get a report of the unhealthy replica. - waitForScmToSeeUnhealthyReplica(containerID); + waitForScmToSeeReplicaState(containerID, UNHEALTHY); corruption.assertLogged(containerID, 1, logCapturer); + long newReportedDataChecksum = getContainerReplica(containerID).getDataChecksum(); + assertNotEquals(initialReportedDataChecksum, newReportedDataChecksum); } } From 452c2940559a92c5d01b8760b9603c116630f676 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 8 Jan 2025 14:16:47 -0500 Subject: [PATCH 10/62] Update acceptance test --- .../main/smoketest/admincli/container.robot | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot index ec77b1a8566b..307fb2431957 100644 --- a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot +++ b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot @@ -34,11 +34,14 @@ Container is closed ${output} = Execute ozone admin container info "${container}" Should contain ${output} CLOSED -Reconciliation complete - [arguments] ${container} - ${data_checksum} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[].dataChecksum' | head -n1 - Should not be empty ${data_checksum} - Should not be equal as strings 0 ${data_checksum} +Container checksums should match + [arguments] ${container} ${expected_checksum} + ${data_checksum1} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[0].dataChecksum' | head -n1 + ${data_checksum2} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[1].dataChecksum' | head -n1 + ${data_checksum3} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[2].dataChecksum' | head -n1 + Should be equal as strings ${data_checksum1} ${expected_checksum} + Should be equal as strings ${data_checksum2} ${expected_checksum} + Should be equal as strings ${data_checksum3} ${expected_checksum} *** Test Cases *** Create container @@ -122,9 +125,8 @@ Cannot reconcile open container ${container} = Execute ozone admin container list --state OPEN | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -n1 Execute and check rc ozone admin container reconcile "${container}" 255 # The container should not yet have any replica checksums since it is still open. - ${data_checksum} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[].dataChecksum' | head -n1 # 0 is the hex value of an empty checksum. - Should Be Equal As Strings 0 ${data_checksum} + Container checksums should match ${container} 0 Close container ${container} = Execute ozone admin container list --state OPEN | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -1 @@ -137,10 +139,10 @@ Close container Reconcile closed container ${container} = Execute ozone admin container list --state CLOSED | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -1 - # TODO wait for container close to populate the checksum. ${data_checksum} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[].dataChecksum' | head -n1 - # 0 is the hex value of an empty checksum. - Should Be Equal As Strings 0 ${data_checksum} - # When reconciliation finishes, replica checksums should be shown. + # Once the container is closed, the data checksum should be populated + Should Not Be Equal As Strings 0 ${data_checksum} + Container checksums should match ${container} ${data_checksum} + # Check that reconcile CLI returns success. Without fault injection, there is no change expected to the + # container's checksums to inidcate it made a difference Execute ozone admin container reconcile ${container} - Wait until keyword succeeds 1min 5sec Reconciliation complete ${container} From dc45eca6f871a48f9c24061d7fdbfea8dc09b3a9 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 8 Jan 2025 14:22:49 -0500 Subject: [PATCH 11/62] Add javadoc for tree generation from metadata --- .../hadoop/ozone/container/keyvalue/KeyValueHandler.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 4babdd144f8c..45813747737e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -547,6 +547,15 @@ ContainerCommandResponseProto handleCloseContainer( return getSuccessResponse(request); } + /** + * Write the merkle tree for this container using the existing checksum metadata only. The data is not read or + * validated by this method, so it is expected to run quickly. + * + * If a checksum file already exists on the disk, this method will do nothing. The existing file would have either + * been made from the metadata or data itself so there is no need to recreate it from the metadata. + * + * @param container The container which will have a tree generated. + */ private void createContainerMerkleTree(Container container) { if (ContainerChecksumTreeManager.checksumFileExist(container)) { return; From 1cb291f2e0e76e327f0920a018b773e89b49301e Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 8 Jan 2025 15:49:37 -0500 Subject: [PATCH 12/62] Data integration tests passing --- .../checksum/ContainerChecksumTreeManager.java | 9 +++++++-- .../ozoneimpl/BackgroundContainerDataScanner.java | 8 +++++++- .../ozoneimpl/OnDemandContainerDataScanner.java | 7 ++++++- ...ackgroundContainerMetadataScannerIntegration.java | 12 +++++++----- .../TestOnDemandContainerDataScannerIntegration.java | 5 ++++- 5 files changed, 31 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 036208efeb29..298742eda4dc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -89,6 +89,9 @@ public void writeContainerDataTree(ContainerData data, ContainerMerkleTree tree) long containerID = data.getContainerID(); Lock writeLock = getLock(containerID); writeLock.lock(); + // If there is an error generating the tree and we cannot obtain a final checksum, use 0 to indicate a metadata + // failure. + long dataChecksum = 0; try { ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = null; try { @@ -103,14 +106,16 @@ public void writeContainerDataTree(ContainerData data, ContainerMerkleTree tree) ContainerProtos.ContainerMerkleTree treeProto = captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(), tree::toProto); - long dataChecksum = treeProto.getDataChecksum(); - data.setDataChecksum(dataChecksum); checksumInfoBuilder .setContainerID(containerID) .setContainerMerkleTree(treeProto); write(data, checksumInfoBuilder.build()); + // If write succeeds, update the checksum in memory. Otherwise 0 will be used to indicate the metadata failure. + dataChecksum = treeProto.getDataChecksum(); LOG.debug("Data merkle tree for container {} updated with container checksum {}", containerID, dataChecksum); } finally { + // Even if persisting the tree fails, we should still update the data checksum in memory to report back to SCM. + data.setDataChecksum(dataChecksum); writeLock.unlock(); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java index bf4d88626d8c..f3d34279ddfc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java @@ -92,6 +92,13 @@ public void scanContainer(Container c) if (result.isDeleted()) { LOG.debug("Container [{}] has been deleted during the data scan.", containerId); } else { + // Merkle tree write failure should not abort the scanning process. Continue marking the scan as completed. + try { + checksumManager.writeContainerDataTree(containerData, result.getDataTree()); + } catch (IOException ex) { + LOG.error("Failed to write container merkle tree for container {}", containerId, ex); + } + if (!result.isHealthy()) { logUnhealthyScanResult(containerId, result, LOG); @@ -104,7 +111,6 @@ public void scanContainer(Container c) } } metrics.incNumContainersScanned(); - checksumManager.writeContainerDataTree(containerData, result.getDataTree()); } Instant now = Instant.now(); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index 427104d9a730..eb7fe82ed5b5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -141,6 +141,12 @@ private static void performOnDemandScan(Container container) { if (result.isDeleted()) { LOG.debug("Container [{}] has been deleted during the data scan.", containerId); } else { + // Merkle tree write failure should not abort the scanning process. Continue marking the scan as completed. + try { + instance.checksumManager.writeContainerDataTree(containerData, result.getDataTree()); + } catch (IOException ex) { + LOG.error("Failed to write container merkle tree for container {}", containerId, ex); + } if (!result.isHealthy()) { logUnhealthyScanResult(containerId, result, LOG); boolean containerMarkedUnhealthy = instance.containerController @@ -150,7 +156,6 @@ private static void performOnDemandScan(Container container) { } } instance.metrics.incNumContainersScanned(); - instance.checksumManager.writeContainerDataTree(containerData, result.getDataTree()); } Instant now = Instant.now(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java index 9c21aa5836c7..ed92a61a8463 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java @@ -98,14 +98,14 @@ void testCorruptionDetected(TestContainerCorruptions corruption) assertEquals(State.CLOSED, closedContainer.getContainerState()); assertTrue(containerChecksumFileExists(closedContainerID)); waitForScmToSeeReplicaState(closedContainerID, CLOSED); - long initialReportedClosedChecksum = getContainerReplica(closedContainerID).getDataChecksum(); + long initialClosedChecksum = getContainerReplica(closedContainerID).getDataChecksum(); + assertNotEquals(0, initialClosedChecksum); long openContainerID = writeDataToOpenContainer(); Container openContainer = getDnContainer(openContainerID); assertEquals(State.OPEN, openContainer.getContainerState()); - long initialReportedOpenChecksum = getContainerReplica(openContainerID).getDataChecksum(); // Open containers should not yet have a checksum generated. - assertEquals(0, initialReportedOpenChecksum); + assertEquals(0, getContainerReplica(openContainerID).getDataChecksum()); // Corrupt both containers. corruption.applyTo(closedContainer); @@ -120,10 +120,12 @@ void testCorruptionDetected(TestContainerCorruptions corruption) 500, 5000); // Wait for SCM to get reports of the unhealthy replicas. + // The metadata scanner does not generate data checksums and the other scanners have been turned off for this + // test, so the data checksums should not change. waitForScmToSeeReplicaState(closedContainerID, UNHEALTHY); - assertNotEquals(initialReportedClosedChecksum, getContainerReplica(closedContainerID).getDataChecksum()); + assertEquals(initialClosedChecksum, getContainerReplica(closedContainerID).getDataChecksum()); waitForScmToSeeReplicaState(openContainerID, UNHEALTHY); - assertNotEquals(initialReportedOpenChecksum, getContainerReplica(openContainerID).getDataChecksum()); + assertEquals(0, getContainerReplica(openContainerID).getDataChecksum()); // Once the unhealthy replica is reported, the open container's lifecycle // state in SCM should move to closed. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java index 36a92e870455..99e5967f38aa 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java @@ -34,6 +34,7 @@ import java.util.Collection; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -102,9 +103,11 @@ void testCorruptionDetected(TestContainerCorruptions corruption) // Container corruption has not yet been introduced. Container container = getDnContainer(containerID); assertEquals(State.CLOSED, container.getContainerState()); - long initialReportedDataChecksum = getContainerReplica(containerID).getDataChecksum(); assertTrue(containerChecksumFileExists(containerID)); + waitForScmToSeeReplicaState(containerID, CLOSED); + long initialReportedDataChecksum = getContainerReplica(containerID).getDataChecksum(); + // Corrupt the container. corruption.applyTo(container); // This method will check that reading from the corrupted key returns an From d6b21d255eba6228a7cd20f07370371cb60bb0a6 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 8 Jan 2025 21:50:10 -0500 Subject: [PATCH 13/62] Don't generate tree from metadata for unhealthy container Metadata scanner integration tests are passing --- .../apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 45813747737e..962501e454d5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1358,7 +1358,6 @@ public void markContainerUnhealthy(Container container, ScanResult reason) } finally { container.writeUnlock(); } - createContainerMerkleTree(container); // Even if the container file is corrupted/missing and the unhealthy // update fails, the unhealthy state is kept in memory and sent to // SCM. Write a corresponding entry to the container log as well. From 2a2dbbd76631f66e1299c5b449b70d5118900ef0 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 8 Jan 2025 21:53:06 -0500 Subject: [PATCH 14/62] Checkstyle --- .../hadoop/ozone/container/checksum/ContainerMerkleTree.java | 1 - .../ozone/container/checksum/ContainerMerkleTreeTestUtils.java | 2 -- .../ozone/container/keyvalue/TestKeyValueContainerCheck.java | 1 - .../ozoneimpl/TestBackgroundContainerDataScanner.java | 1 - .../scanner/TestBackgroundContainerDataScannerIntegration.java | 3 --- .../dn/scanner/TestContainerScannerIntegrationAbstract.java | 1 - .../scanner/TestOnDemandContainerDataScannerIntegration.java | 1 - 7 files changed, 10 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java index 651f6fbb0a74..1a8d78bf793f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTree.java @@ -22,7 +22,6 @@ import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import java.nio.ByteBuffer; -import java.util.Collection; import java.util.SortedMap; import java.util.TreeMap; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index 140fff1517b2..e568e6051019 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -23,7 +23,6 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.OzoneClientConfig; import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.ozone.HddsDatanodeService; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; @@ -35,7 +34,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java index e4d0206bd5d3..d88b400c8d05 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java @@ -26,7 +26,6 @@ import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.checksum.ContainerDiffReport; -import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTree; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java index 576a0477be4d..a03d32a96810 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java @@ -19,7 +19,6 @@ */ package org.apache.hadoop.ozone.container.ozoneimpl; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java index b4910278257a..0f29f4179e71 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java @@ -20,10 +20,7 @@ package org.apache.hadoop.ozone.dn.scanner; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; -import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; import org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java index c2befa60d74b..4a0fec534427 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java @@ -60,7 +60,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; /** * This class tests the data scanner functionality. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java index 99e5967f38aa..82fc1b42f328 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java @@ -21,7 +21,6 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; import org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions; From c9a077c2d00ecf112018715fe4e0a32b5b086543 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 8 Jan 2025 22:35:25 -0500 Subject: [PATCH 15/62] Marking container unhealthy should not write a merkle tree (test fix) --- .../keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java index af0c430c86d5..9652e37a4487 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java @@ -255,7 +255,6 @@ public void testMarkContainerUnhealthyInFailedVolume() throws IOException { // unhealthy. hddsVolume.setState(StorageVolume.VolumeState.NORMAL); handler.markContainerUnhealthy(container, ContainerTestUtils.getUnhealthyDataScanResult()); - assertTrue(ContainerChecksumTreeManager.checksumFileExist(container)); verify(mockIcrSender, atMostOnce()).send(any()); } From 0bbbdc59c370515d37a3dd7ba9740f022024ccff Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 9 Jan 2025 09:38:33 -0500 Subject: [PATCH 16/62] Checkstyle --- .../keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java index 9652e37a4487..b9911874b166 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java @@ -64,7 +64,6 @@ import static org.apache.hadoop.ozone.container.ContainerTestHelper.getWriteChunkRequest; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.any; import static org.mockito.Mockito.atMostOnce; import static org.mockito.Mockito.mock; From 7b971a9491f04642f3d22e0aa83e405c1bb784a1 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 13 Jan 2025 16:40:29 -0500 Subject: [PATCH 17/62] Address review comments --- .../ozone/container/keyvalue/KeyValueContainerCheck.java | 2 ++ .../hadoop/ozone/container/keyvalue/KeyValueHandler.java | 8 ++++---- .../dist/src/main/smoketest/admincli/container.robot | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java index 3b4e507e1984..ab404cc7c462 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainerCheck.java @@ -384,6 +384,8 @@ private List scanBlock(DBHandle db, File dbFile, BlockData b blockErrors.add(error); } } else if (chunk.getChecksumData().getType() != ContainerProtos.ChecksumType.NONE) { + // Before adding chunks, add a block entry to the tree to represent cases where the block exists but has no + // chunks. currentTree.addBlock(block.getBlockID().getLocalID()); int bytesPerChecksum = chunk.getChecksumData().getBytesPerChecksum(); ByteBuffer buffer = BUFFER_POOL.getBuffer(bytesPerChecksum); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 962501e454d5..06f8304a8950 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -556,7 +556,7 @@ ContainerCommandResponseProto handleCloseContainer( * * @param container The container which will have a tree generated. */ - private void createContainerMerkleTree(Container container) { + private void createContainerMerkleTreeFromMetadata(Container container) { if (ContainerChecksumTreeManager.checksumFileExist(container)) { return; } @@ -1325,7 +1325,7 @@ public void markContainerForClose(Container container) } finally { container.writeUnlock(); } - createContainerMerkleTree(container); + createContainerMerkleTreeFromMetadata(container); ContainerLogger.logClosing(container.getContainerData()); sendICR(container); } @@ -1388,7 +1388,7 @@ public void quasiCloseContainer(Container container, String reason) } finally { container.writeUnlock(); } - createContainerMerkleTree(container); + createContainerMerkleTreeFromMetadata(container); ContainerLogger.logQuasiClosed(container.getContainerData(), reason); sendICR(container); } @@ -1422,7 +1422,7 @@ public void closeContainer(Container container) } finally { container.writeUnlock(); } - createContainerMerkleTree(container); + createContainerMerkleTreeFromMetadata(container); ContainerLogger.logClosed(container.getContainerData()); sendICR(container); } diff --git a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot index 307fb2431957..33fca4cd220f 100644 --- a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot +++ b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot @@ -144,5 +144,5 @@ Reconcile closed container Should Not Be Equal As Strings 0 ${data_checksum} Container checksums should match ${container} ${data_checksum} # Check that reconcile CLI returns success. Without fault injection, there is no change expected to the - # container's checksums to inidcate it made a difference + # container's checksums to indicate it made a difference Execute ozone admin container reconcile ${container} From 0989881ad9627a174e67510cdb3baa93962631f2 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 11 Apr 2025 15:57:40 -0400 Subject: [PATCH 18/62] Initial use of on demand scan in TestKeyValueHandler Test is flaking when run multiple times with different inputs. --- .../container/common/impl/ContainerSet.java | 1 + .../container/common/interfaces/Handler.java | 3 +- .../container/keyvalue/KeyValueHandler.java | 3 +- .../OnDemandContainerDataScanner.java | 2 +- .../ContainerMerkleTreeTestUtils.java | 12 --- .../keyvalue/TestKeyValueHandler.java | 87 +++++++++++++------ 6 files changed, 65 insertions(+), 43 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index 2124bd9c4700..da0df252b12a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -47,6 +47,7 @@ import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java index 76e3673ce67f..38efe92147b8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Collection; import java.util.Set; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -201,7 +202,7 @@ public abstract void deleteContainer(Container container, boolean force) * @param peers The other datanodes with a copy of this container whose data should be checked. */ public abstract void reconcileContainer(DNContainerOperationClient dnClient, Container container, - Set peers) throws IOException; + Collection peers) throws IOException; /** * Deletes the given files associated with a block of the container. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index a7ef24a34f3a..c142b93df5d3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -74,6 +74,7 @@ import java.time.Clock; import java.time.Instant; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -1508,7 +1509,7 @@ public void deleteContainer(Container container, boolean force) @Override public void reconcileContainer(DNContainerOperationClient dnClient, Container container, - Set peers) throws IOException { + Collection peers) throws IOException { KeyValueContainer kvContainer = (KeyValueContainer) container; KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); Optional optionalChecksumInfo = checksumManager.read(containerData); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index 5c39b714f959..3f1197292b0e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -87,7 +87,7 @@ private static boolean shouldScan(Container container) { } long containerID = container.getContainerData().getContainerID(); if (instance == null) { - LOG.debug("Skipping on demand scan for container {} since scanner was " + + LOG.warn("Skipping on demand scan for container {} since scanner was " + "not initialized.", containerID); return false; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index c0d5ef60f517..b6cd35773b45 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -47,18 +47,6 @@ import java.util.Random; import java.util.Set; import java.util.stream.Collectors; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.hdds.conf.ConfigurationSource; -import org.apache.hadoop.hdds.conf.StorageUnit; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.scm.OzoneClientConfig; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.hdds.scm.container.ContainerInfo; -import org.apache.hadoop.ozone.HddsDatanodeService; -import org.apache.hadoop.ozone.container.common.impl.ContainerData; -import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; -import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; /** * Helper methods for testing container checksum tree files and container reconciliation. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 20e6b85a0008..31ac4580ec34 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -27,8 +27,6 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_LAYOUT_KEY; import static org.apache.hadoop.ozone.OzoneConsts.GB; -import static org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager.getContainerChecksumFile; -import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.writeContainerDataTreeProto; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.WRITE_STAGE; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createBlockMetaData; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; @@ -72,8 +70,10 @@ import java.util.Random; import java.util.Set; import java.util.UUID; +import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Stream; + import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.RandomStringUtils; @@ -120,6 +120,9 @@ import org.apache.hadoop.ozone.container.common.volume.StorageVolume; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; +import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; +import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; +import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerDataScanner; import org.apache.hadoop.util.Sets; import org.apache.ozone.test.GenericTestUtils; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; @@ -131,6 +134,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.Mock; import org.mockito.MockedStatic; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; @@ -156,6 +160,7 @@ public class TestKeyValueHandler { private HddsDispatcher dispatcher; private KeyValueHandler handler; private OzoneConfiguration conf; + private ContainerSet mockContainerSet; /** * Number of corrupt blocks and chunks. @@ -188,9 +193,11 @@ public void setup() throws IOException { HashMap handlers = new HashMap<>(); handlers.put(ContainerType.KeyValueContainer, handler); + mockContainerSet = Mockito.mock(ContainerSet.class); + dispatcher = new HddsDispatcher( new OzoneConfiguration(), - mock(ContainerSet.class), + mockContainerSet, mock(VolumeSet.class), handlers, mock(StateContext.class), @@ -591,6 +598,11 @@ public void testContainerChecksumInvocation(ContainerLayoutVersion layoutVersion public void testFullContainerReconciliation(int numBlocks, int numChunks) throws Exception { KeyValueHandler kvHandler = createKeyValueHandler(tempDir); ContainerChecksumTreeManager checksumManager = kvHandler.getChecksumManager(); + + ContainerController controller = new ContainerController(mockContainerSet, + Collections.singletonMap(ContainerType.KeyValueContainer, kvHandler)); + OnDemandContainerDataScanner.init(conf.getObject(ContainerScannerConfiguration.class), controller, checksumManager); + DNContainerOperationClient dnClient = new DNContainerOperationClient(conf, null, null); final long containerID = 100L; // Create 3 containers with 15 blocks each and 3 replicas. @@ -600,6 +612,8 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws // Introduce corruption in each container on different replicas. introduceCorruption(kvHandler, containers.get(1), numBlocks, numChunks, false); introduceCorruption(kvHandler, containers.get(2), numBlocks, numChunks, true); + // Use synchronous on-demand scans to re-build the merkle trees after corruption. + waitForContainerScans(containers); // Without reconciliation, checksums should be different because of the corruption. Set checksumsBeforeReconciliation = new HashSet<>(); @@ -626,11 +640,13 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws Mockito.mockStatic(ContainerProtocolCalls.class)) { mockContainerProtocolCalls(containerProtocolMock, dnToContainerMap, checksumManager, kvHandler, containerID); - kvHandler.reconcileContainer(dnClient, containers.get(0), Sets.newHashSet(datanodes)); - kvHandler.reconcileContainer(dnClient, containers.get(1), Sets.newHashSet(datanodes)); - kvHandler.reconcileContainer(dnClient, containers.get(2), Sets.newHashSet(datanodes)); + kvHandler.reconcileContainer(dnClient, containers.get(0), datanodes); + kvHandler.reconcileContainer(dnClient, containers.get(1), datanodes); + kvHandler.reconcileContainer(dnClient, containers.get(2), datanodes); // After reconciliation, checksums should be the same for all containers. + // Reconciliation should have updated the tree based on the updated metadata that was obtained for the + // previously corrupted data. We do not need to wait for the full data scan to complete. ContainerProtos.ContainerChecksumInfo prevContainerChecksumInfo = null; for (KeyValueContainer kvContainer : containers) { kvHandler.createContainerMerkleTreeFromMetadata(kvContainer); @@ -646,6 +662,21 @@ public void testFullContainerReconciliation(int numBlocks, int numChunks) throws } } } + + public void waitForContainerScans(List containers) throws Exception { + for (KeyValueContainer container: containers) { + // The on-demand scanner has been initialized to pull from the mock container set. + // Make it pull the corresponding container instance to scan in this run based on ID. + long containerID = container.getContainerData().getContainerID(); + Mockito.doReturn(container).when(mockContainerSet).getContainer(containerID); + + Optional> scanFuture = OnDemandContainerDataScanner.scanContainer(container); + assertTrue(scanFuture.isPresent()); + // Wait for on-demand scan to complete. + scanFuture.get().get(); + } + } + private void mockContainerProtocolCalls(MockedStatic containerProtocolMock, Map dnToContainerMap, ContainerChecksumTreeManager checksumManager, @@ -920,8 +951,8 @@ private void introduceCorruption(KeyValueHandler kvHandler, KeyValueContainer ke } handle.getStore().getBatchHandler().commitBatchOperation(batch); } - Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); - kvHandler.createContainerMerkleTreeFromMetadata(keyValueContainer); +// Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); +// kvHandler.createContainerMerkleTreeFromMetadata(keyValueContainer); // Corrupt chunks at an offset. List blockDataList = kvHandler.getBlockManager().listBlock(keyValueContainer, -1, 100); @@ -936,26 +967,26 @@ private void introduceCorruption(KeyValueHandler kvHandler, KeyValueContainer ke corruptFileAtOffset(blockFile, (int) chunkInfo.getOffset(), (int) chunkInfo.getLen()); // TODO: On-demand scanner (HDDS-10374) should detect this corruption and generate container merkle tree. - ContainerProtos.ContainerChecksumInfo.Builder builder = kvHandler.getChecksumManager() - .read(containerData).get().toBuilder(); - List blockMerkleTreeList = builder.getContainerMerkleTree() - .getBlockMerkleTreeList(); - assertEquals(size, blockMerkleTreeList.size()); - - builder.getContainerMerkleTreeBuilder().clearBlockMerkleTree(); - for (int j = 0; j < blockMerkleTreeList.size(); j++) { - ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTreeList.get(j).toBuilder(); - if (j == blockIndex) { - List chunkMerkleTreeBuilderList = - blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); - chunkMerkleTreeBuilderList.get(chunkIndex).setIsHealthy(false).setDataChecksum(random.nextLong()); - blockMerkleTreeBuilder.setDataChecksum(random.nextLong()); - } - builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); - } - builder.getContainerMerkleTreeBuilder().setDataChecksum(random.nextLong()); - Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); - writeContainerDataTreeProto(keyValueContainer.getContainerData(), builder.getContainerMerkleTree()); +// ContainerProtos.ContainerChecksumInfo.Builder builder = kvHandler.getChecksumManager() +// .read(containerData).get().toBuilder(); +// List blockMerkleTreeList = builder.getContainerMerkleTree() +// .getBlockMerkleTreeList(); +// assertEquals(size, blockMerkleTreeList.size()); + +// builder.getContainerMerkleTreeBuilder().clearBlockMerkleTree(); +// for (int j = 0; j < blockMerkleTreeList.size(); j++) { +// ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTreeList.get(j).toBuilder(); +// if (j == blockIndex) { +// List chunkMerkleTreeBuilderList = +// blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); +// chunkMerkleTreeBuilderList.get(chunkIndex).setIsHealthy(false).setDataChecksum(random.nextLong()); +// blockMerkleTreeBuilder.setDataChecksum(random.nextLong()); +// } +// builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); +// } +// builder.getContainerMerkleTreeBuilder().setDataChecksum(random.nextLong()); +// Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); +// writeContainerDataTreeProto(keyValueContainer.getContainerData(), builder.getContainerMerkleTree()); } } From 834be96e55e3e0ca6fd51012c72164f8a198d114 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 15 Apr 2025 16:41:47 -0400 Subject: [PATCH 19/62] Make on-demand scanner a normal instance --- .../OnDemandContainerDataScanner.java | 71 ++++++------------- 1 file changed, 23 insertions(+), 48 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index 3a9c620fee62..aa2d47490d0b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -45,8 +45,6 @@ public final class OnDemandContainerDataScanner { public static final Logger LOG = LoggerFactory.getLogger(OnDemandContainerDataScanner.class); - private static volatile OnDemandContainerDataScanner instance; - private final ExecutorService scanExecutor; private final ContainerController containerController; private final DataTransferThrottler throttler; @@ -68,26 +66,11 @@ private OnDemandContainerDataScanner( minScanGap = conf.getContainerScanMinGap(); } - public static synchronized void init( - ContainerScannerConfiguration conf, ContainerController controller) { - if (instance != null) { - LOG.warn("Trying to initialize on demand scanner" + - " a second time on a datanode."); - return; - } - instance = new OnDemandContainerDataScanner(conf, controller); - } - - private static boolean shouldScan(Container container) { + private boolean shouldScan(Container container) { if (container == null) { return false; } long containerID = container.getContainerData().getContainerID(); - if (instance == null) { - LOG.debug("Skipping on demand scan for container {} since scanner was " + - "not initialized.", containerID); - return false; - } HddsVolume containerVolume = container.getContainerData().getVolume(); if (containerVolume.isFailed()) { @@ -96,11 +79,11 @@ private static boolean shouldScan(Container container) { return false; } - return !ContainerUtils.recentlyScanned(container, instance.minScanGap, + return !ContainerUtils.recentlyScanned(container, minScanGap, LOG) && container.shouldScanData(); } - public static Optional> scanContainer(Container container) { + public Optional> scanContainer(Container container) { if (!shouldScan(container)) { return Optional.empty(); } @@ -108,7 +91,7 @@ public static Optional> scanContainer(Container container) { Future resultFuture = null; long containerId = container.getContainerData().getContainerID(); if (addContainerToScheduledContainers(containerId)) { - resultFuture = instance.scanExecutor.submit(() -> { + resultFuture = scanExecutor.submit(() -> { performOnDemandScan(container); removeContainerFromScheduledContainers(containerId); }); @@ -116,16 +99,16 @@ public static Optional> scanContainer(Container container) { return Optional.ofNullable(resultFuture); } - private static boolean addContainerToScheduledContainers(long containerId) { - return instance.containerRescheduleCheckSet.add(containerId); + private boolean addContainerToScheduledContainers(long containerId) { + return containerRescheduleCheckSet.add(containerId); } - private static void removeContainerFromScheduledContainers( + private void removeContainerFromScheduledContainers( long containerId) { - instance.containerRescheduleCheckSet.remove(containerId); + containerRescheduleCheckSet.remove(containerId); } - private static void performOnDemandScan(Container container) { + private void performOnDemandScan(Container container) { if (!shouldScan(container)) { return; } @@ -135,21 +118,21 @@ private static void performOnDemandScan(Container container) { ContainerData containerData = container.getContainerData(); logScanStart(containerData); - ScanResult result = container.scanData(instance.throttler, instance.canceler); + ScanResult result = container.scanData(throttler, canceler); // Metrics for skipped containers should not be updated. if (result.isDeleted()) { LOG.debug("Container [{}] has been deleted during the data scan.", containerId); } else { if (!result.isHealthy()) { logUnhealthyScanResult(containerId, result, LOG); - boolean containerMarkedUnhealthy = instance.containerController + boolean containerMarkedUnhealthy = containerController .markContainerUnhealthy(containerId, result); if (containerMarkedUnhealthy) { - instance.metrics.incNumUnHealthyContainers(); + metrics.incNumUnHealthyContainers(); } } // TODO HDDS-10374 will need to update the merkle tree here as well. - instance.metrics.incNumContainersScanned(); + metrics.incNumContainersScanned(); } // Even if the container was deleted, mark the scan as completed since we already logged it as starting. @@ -157,7 +140,7 @@ private static void performOnDemandScan(Container container) { logScanCompleted(containerData, now); if (!result.isDeleted()) { - instance.containerController.updateDataScanTimestamp(containerId, now); + containerController.updateDataScanTimestamp(containerId, now); } } catch (IOException e) { LOG.warn("Unexpected exception while scanning container " @@ -169,7 +152,7 @@ private static void performOnDemandScan(Container container) { } } - private static void logScanStart(ContainerData containerData) { + private void logScanStart(ContainerData containerData) { if (LOG.isDebugEnabled()) { Optional scanTimestamp = containerData.lastDataScanTime(); Object lastScanTime = scanTimestamp.map(ts -> "at " + ts).orElse("never"); @@ -178,35 +161,27 @@ private static void logScanStart(ContainerData containerData) { } } - private static void logScanCompleted( + private void logScanCompleted( ContainerData containerData, Instant timestamp) { LOG.debug("Completed scan of container {} at {}", containerData.getContainerID(), timestamp); } - public static OnDemandScannerMetrics getMetrics() { - return instance.metrics; + public OnDemandScannerMetrics getMetrics() { + return metrics; } @VisibleForTesting - public static DataTransferThrottler getThrottler() { - return instance.throttler; + public DataTransferThrottler getThrottler() { + return throttler; } @VisibleForTesting - public static Canceler getCanceler() { - return instance.canceler; - } - - public static synchronized void shutdown() { - if (instance == null) { - return; - } - instance.shutdownScanner(); + public Canceler getCanceler() { + return canceler; } - private synchronized void shutdownScanner() { - instance = null; + public synchronized void shutdown() { metrics.unregister(); String shutdownMessage = "On-demand container scanner is shutting down."; LOG.info(shutdownMessage); From e73757ebe813a41c645a5b30800753d256977682 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 15 Apr 2025 19:02:08 -0400 Subject: [PATCH 20/62] Register on-demand scan callback in ContainerSet --- .../container/common/impl/ContainerSet.java | 17 +++++++++++++++++ .../ozoneimpl/OnDemandContainerDataScanner.java | 2 +- .../container/ozoneimpl/OzoneContainer.java | 4 +++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index 2124bd9c4700..0e431f7784b0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -37,6 +37,8 @@ import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; + import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; @@ -66,6 +68,8 @@ public class ContainerSet implements Iterable> { private Clock clock; private long recoveringTimeout; private final Table containerIdsTable; + // Handler that will be invoked when a user of a container reports an error. + private Consumer> containerErrorHandler; @VisibleForTesting public ContainerSet(long recoveringTimeout) { @@ -128,6 +132,19 @@ public void ensureContainerNotMissing(long containerId, State state) throws Stor } } + /** + * @param handler All callback that will be invoked when an error is reported with a member of this container set. + */ + public void registerContainerErrorHandler(Consumer> handler) { + this.containerErrorHandler = handler; + } + + public void reportError(long containerID) { + if (containerErrorHandler != null) { + containerErrorHandler.accept(getContainer(containerID)); + } + } + /** * Add Container to container map. * @param container container to be added diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index aa2d47490d0b..46da3f4521eb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -54,7 +54,7 @@ public final class OnDemandContainerDataScanner { private final OnDemandScannerMetrics metrics; private final long minScanGap; - private OnDemandContainerDataScanner( + public OnDemandContainerDataScanner( ContainerScannerConfiguration conf, ContainerController controller) { containerController = controller; throttler = new DataTransferThrottler( diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index 370b455e3367..6d2f0a5d07ba 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -121,6 +121,7 @@ public class OzoneContainer { private final XceiverServerSpi readChannel; private final ContainerController controller; private BackgroundContainerMetadataScanner metadataScanner; + private OnDemandContainerDataScanner onDemandScanner; private List dataScanners; private List backgroundScanners; private final BlockDeletingService blockDeletingService; @@ -432,7 +433,8 @@ private void initOnDemandContainerScanner(ContainerScannerConfiguration c) { "so the on-demand container data scanner will not start."); return; } - OnDemandContainerDataScanner.init(c, controller); + onDemandScanner = new OnDemandContainerDataScanner(c, controller); + containerSet.registerContainerErrorHandler(onDemandScanner::scanContainer); } /** From f0d8efee73a18736a8dbad7068ca7e016eb025ac Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 15 Apr 2025 19:05:41 -0400 Subject: [PATCH 21/62] Migrate scanContainer usage in prod code --- .../container/common/impl/HddsDispatcher.java | 2 +- .../container/keyvalue/KeyValueHandler.java | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java index 0d113d467d94..9572186cd1f1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java @@ -428,7 +428,7 @@ && getMissingContainerSet().contains(containerID)) { // Create a specific exception that signals for on demand scanning // and move this general scan to where it is more appropriate. // Add integration tests to test the full functionality. - OnDemandContainerDataScanner.scanContainer(container); + containerSet.reportError(containerID); audit(action, eventType, msg, dispatcherContext, AuditEventStatus.FAILURE, new Exception(responseProto.getMessage())); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 43926ca5e282..ef4a430fd363 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1507,6 +1507,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container Set peers) throws IOException { KeyValueContainer kvContainer = (KeyValueContainer) container; KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); + long containerID = containerData.getContainerID(); Optional optionalChecksumInfo = checksumManager.read(containerData); ContainerProtos.ContainerChecksumInfo checksumInfo; @@ -1521,10 +1522,10 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container for (DatanodeDetails peer : peers) { long start = Instant.now().toEpochMilli(); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = dnClient.getContainerChecksumInfo( - containerData.getContainerID(), peer); + containerID, peer); if (peerChecksumInfo == null) { LOG.warn("Cannot reconcile container {} with peer {} which has not yet generated a checksum", - containerData.getContainerID(), peer); + containerID, peer); continue; } @@ -1538,7 +1539,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container handleMissingBlock(kvContainer, pipeline, dnClient, missingBlock, chunkByteBuffer); } catch (IOException e) { LOG.error("Error while reconciling missing block for block {} in container {}", missingBlock.getBlockID(), - containerData.getContainerID(), e); + containerID, e); } } @@ -1548,7 +1549,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container reconcileChunksPerBlock(kvContainer, pipeline, dnClient, entry.getKey(), entry.getValue(), chunkByteBuffer); } catch (IOException e) { LOG.error("Error while reconciling missing chunk for block {} in container {}", entry.getKey(), - containerData.getContainerID(), e); + containerID, e); } } @@ -1558,7 +1559,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container reconcileChunksPerBlock(kvContainer, pipeline, dnClient, entry.getKey(), entry.getValue(), chunkByteBuffer); } catch (IOException e) { LOG.error("Error while reconciling corrupt chunk for block {} in container {}", entry.getKey(), - containerData.getContainerID(), e); + containerID, e); } } // Update checksum based on RocksDB metadata. The read chunk validates the checksum of the data @@ -1570,18 +1571,18 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container if (dataChecksum == oldDataChecksum) { metrics.incContainerReconciledWithoutChanges(); LOG.info("Container {} reconciled with peer {}. No change in checksum. Current checksum {}. Time taken {} ms", - containerData.getContainerID(), peer.toString(), checksumToString(dataChecksum), duration); + containerID, peer.toString(), checksumToString(dataChecksum), duration); } else { metrics.incContainerReconciledWithChanges(); LOG.warn("Container {} reconciled with peer {}. Checksum updated from {} to {}. Time taken {} ms", - containerData.getContainerID(), peer.toString(), checksumToString(oldDataChecksum), + containerID, peer.toString(), checksumToString(oldDataChecksum), checksumToString(dataChecksum), duration); } ContainerLogger.logReconciled(container.getContainerData(), oldDataChecksum, peer); } // Trigger manual on demand scanner - OnDemandContainerDataScanner.scanContainer(container); + containerSet.reportError(containerID); sendICR(container); } From 4cb054c8e806c47fa5e6b72f46dec0df652e30e8 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 15 Apr 2025 19:13:35 -0400 Subject: [PATCH 22/62] Switch terminology from error to scan. Add existence checks --- .../container/common/impl/ContainerSet.java | 24 +++++++++++++------ .../container/common/impl/HddsDispatcher.java | 3 +-- .../container/keyvalue/KeyValueHandler.java | 3 +-- .../container/ozoneimpl/OzoneContainer.java | 2 +- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index 0e431f7784b0..28412fa100cc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -69,7 +69,7 @@ public class ContainerSet implements Iterable> { private long recoveringTimeout; private final Table containerIdsTable; // Handler that will be invoked when a user of a container reports an error. - private Consumer> containerErrorHandler; + private Consumer> containerScanHandler; @VisibleForTesting public ContainerSet(long recoveringTimeout) { @@ -133,15 +133,25 @@ public void ensureContainerNotMissing(long containerId, State state) throws Stor } /** - * @param handler All callback that will be invoked when an error is reported with a member of this container set. + * @param scanner A callback that will be invoked when a scan of a container in this set is requested. */ - public void registerContainerErrorHandler(Consumer> handler) { - this.containerErrorHandler = handler; + public void registerContainerScanHandler(Consumer> scanner) { + this.containerScanHandler = scanner; } - public void reportError(long containerID) { - if (containerErrorHandler != null) { - containerErrorHandler.accept(getContainer(containerID)); + /** + * Triggers a scan of a container in this set using the registered scan handler. This is a no-op if no scan handler + * is registered or the container does not exist in the set. + * @param containerID The container in this set to scan. + */ + public void scanContainer(long containerID) { + if (containerScanHandler != null) { + Container container = getContainer(containerID); + if (container != null) { + containerScanHandler.accept(container); + } else { + LOG.warn("Request to scan container {} which was not found in the container set", containerID); + } } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java index 9572186cd1f1..e240ff6bf478 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/HddsDispatcher.java @@ -74,7 +74,6 @@ import org.apache.hadoop.ozone.container.common.volume.VolumeUsage; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScanError; import org.apache.hadoop.ozone.container.ozoneimpl.DataScanResult; -import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerDataScanner; import org.apache.hadoop.util.Time; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.thirdparty.com.google.protobuf.ProtocolMessageEnum; @@ -428,7 +427,7 @@ && getMissingContainerSet().contains(containerID)) { // Create a specific exception that signals for on demand scanning // and move this general scan to where it is more appropriate. // Add integration tests to test the full functionality. - containerSet.reportError(containerID); + containerSet.scanContainer(containerID); audit(action, eventType, msg, dispatcherContext, AuditEventStatus.FAILURE, new Exception(responseProto.getMessage())); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index ef4a430fd363..ab07edb6b78e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -155,7 +155,6 @@ import org.apache.hadoop.ozone.container.keyvalue.impl.ChunkManagerFactory; import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; import org.apache.hadoop.ozone.container.keyvalue.interfaces.ChunkManager; -import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerDataScanner; import org.apache.hadoop.ozone.container.upgrade.VersionedDatanodeFeatures; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Time; @@ -1582,7 +1581,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container } // Trigger manual on demand scanner - containerSet.reportError(containerID); + containerSet.scanContainer(containerID); sendICR(container); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index 6d2f0a5d07ba..e1477e87484a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -434,7 +434,7 @@ private void initOnDemandContainerScanner(ContainerScannerConfiguration c) { return; } onDemandScanner = new OnDemandContainerDataScanner(c, controller); - containerSet.registerContainerErrorHandler(onDemandScanner::scanContainer); + containerSet.registerContainerScanHandler(onDemandScanner::scanContainer); } /** From 8abedb64325e5ded9ae2ea6ffc21668637f293d8 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 15 Apr 2025 19:27:04 -0400 Subject: [PATCH 23/62] Update tests --- .../container/ozoneimpl/OzoneContainer.java | 2 +- .../TestOnDemandContainerDataScanner.java | 67 ++++++++----------- 2 files changed, 29 insertions(+), 40 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index e1477e87484a..bb9ac58c8fe6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -453,7 +453,7 @@ private void stopContainerScrub() { for (BackgroundContainerDataScanner s : dataScanners) { s.shutdown(); } - OnDemandContainerDataScanner.shutdown(); + onDemandScanner.shutdown(); } @VisibleForTesting diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java index 87a77e6d8f97..75dfae3b822c 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java @@ -60,10 +60,13 @@ @MockitoSettings(strictness = Strictness.LENIENT) public class TestOnDemandContainerDataScanner extends TestContainerScannersAbstract { + + private OnDemandContainerDataScanner onDemandScanner; @BeforeEach public void setup() { super.setup(); + onDemandScanner = new OnDemandContainerDataScanner(conf, controller); } @Test @@ -96,14 +99,13 @@ public void testUnscannedContainerIsScanned() throws Exception { @AfterEach public void tearDown() { - OnDemandContainerDataScanner.shutdown(); + onDemandScanner.shutdown(); } @Test public void testScanTimestampUpdated() throws Exception { - OnDemandContainerDataScanner.init(conf, controller); Optional> scanFuture = - OnDemandContainerDataScanner.scanContainer(healthy); + onDemandScanner.scanContainer(healthy); assertTrue(scanFuture.isPresent()); scanFuture.get().get(); verify(controller, atLeastOnce()) @@ -112,7 +114,7 @@ public void testScanTimestampUpdated() throws Exception { // Metrics for deleted container should not be updated. scanFuture = - OnDemandContainerDataScanner.scanContainer(healthy); + onDemandScanner.scanContainer(healthy); assertTrue(scanFuture.isPresent()); scanFuture.get().get(); verify(controller, never()) @@ -121,35 +123,28 @@ public void testScanTimestampUpdated() throws Exception { } @Test - public void testContainerScannerMultipleInitsAndShutdowns() throws Exception { - OnDemandContainerDataScanner.init(conf, controller); - OnDemandContainerDataScanner.init(conf, controller); - OnDemandContainerDataScanner.shutdown(); - OnDemandContainerDataScanner.shutdown(); - //There shouldn't be an interaction after shutdown: - OnDemandContainerDataScanner.scanContainer(corruptData); - verifyContainerMarkedUnhealthy(corruptData, never()); + public void testContainerScannerMultipleShutdowns() { + // No runtime exceptions should be thrown. + onDemandScanner.shutdown(); + onDemandScanner.shutdown(); } @Test public void testSameContainerQueuedMultipleTimes() throws Exception { - OnDemandContainerDataScanner.init(conf, controller); //Given a container that has not finished scanning CountDownLatch latch = new CountDownLatch(1); when(corruptData.scanData( - OnDemandContainerDataScanner.getThrottler(), - OnDemandContainerDataScanner.getCanceler())) + any(), + any())) .thenAnswer((Answer) invocation -> { latch.await(); return getUnhealthyDataScanResult(); }); - Optional> onGoingScan = OnDemandContainerDataScanner - .scanContainer(corruptData); + Optional> onGoingScan = onDemandScanner.scanContainer(corruptData); assertTrue(onGoingScan.isPresent()); assertFalse(onGoingScan.get().isDone()); //When scheduling the same container again - Optional> secondScan = OnDemandContainerDataScanner - .scanContainer(corruptData); + Optional> secondScan = onDemandScanner.scanContainer(corruptData); //Then the second scan is not scheduled and the first scan can still finish assertFalse(secondScan.isPresent()); latch.countDown(); @@ -161,19 +156,18 @@ public void testSameContainerQueuedMultipleTimes() throws Exception { @Test @Override public void testScannerMetrics() throws Exception { - OnDemandContainerDataScanner.init(conf, controller); ArrayList>> resultFutureList = Lists.newArrayList(); - resultFutureList.add(OnDemandContainerDataScanner.scanContainer( + resultFutureList.add(onDemandScanner.scanContainer( corruptData)); resultFutureList.add( - OnDemandContainerDataScanner.scanContainer(openContainer)); + onDemandScanner.scanContainer(openContainer)); resultFutureList.add( - OnDemandContainerDataScanner.scanContainer(openCorruptMetadata)); - resultFutureList.add(OnDemandContainerDataScanner.scanContainer(healthy)); + onDemandScanner.scanContainer(openCorruptMetadata)); + resultFutureList.add(onDemandScanner.scanContainer(healthy)); // Deleted containers will not count towards the scan count metric. - resultFutureList.add(OnDemandContainerDataScanner.scanContainer(deletedContainer)); + resultFutureList.add(onDemandScanner.scanContainer(deletedContainer)); waitOnScannerToFinish(resultFutureList); - OnDemandScannerMetrics metrics = OnDemandContainerDataScanner.getMetrics(); + OnDemandScannerMetrics metrics = onDemandScanner.getMetrics(); //Containers with shouldScanData = false shouldn't increase // the number of scanned containers assertEquals(0, metrics.getNumUnHealthyContainers()); @@ -183,11 +177,9 @@ public void testScannerMetrics() throws Exception { @Test @Override public void testScannerMetricsUnregisters() { - OnDemandContainerDataScanner.init(conf, controller); - String metricsName = OnDemandContainerDataScanner.getMetrics().getName(); + String metricsName = onDemandScanner.getMetrics().getName(); assertNotNull(DefaultMetricsSystem.instance().getSource(metricsName)); - OnDemandContainerDataScanner.shutdown(); - OnDemandContainerDataScanner.scanContainer(healthy); + onDemandScanner.shutdown(); assertNull(DefaultMetricsSystem.instance().getSource(metricsName)); } @@ -196,7 +188,7 @@ public void testScannerMetricsUnregisters() { public void testUnhealthyContainersDetected() throws Exception { // Without initialization, // there shouldn't be interaction with containerController - OnDemandContainerDataScanner.scanContainer(corruptData); + onDemandScanner.scanContainer(corruptData); verifyNoInteractions(controller); scanContainer(healthy); @@ -223,8 +215,7 @@ public void testUnhealthyContainersDetected() throws Exception { public void testWithVolumeFailure() throws Exception { when(vol.isFailed()).thenReturn(true); - OnDemandContainerDataScanner.init(conf, controller); - OnDemandScannerMetrics metrics = OnDemandContainerDataScanner.getMetrics(); + OnDemandScannerMetrics metrics = onDemandScanner.getMetrics(); scanContainer(healthy); verifyContainerMarkedUnhealthy(healthy, never()); @@ -248,11 +239,10 @@ public void testShutdownDuringScan() throws Exception { }); // Start the blocking scan. - OnDemandContainerDataScanner.init(conf, controller); - OnDemandContainerDataScanner.scanContainer(healthy); + onDemandScanner.scanContainer(healthy); // Shut down the on demand scanner. This will interrupt the blocked scan // on the healthy container. - OnDemandContainerDataScanner.shutdown(); + onDemandScanner.shutdown(); // Interrupting the healthy container's scan should not mark it unhealthy. verifyContainerMarkedUnhealthy(healthy, never()); } @@ -271,7 +261,7 @@ public void testUnhealthyContainerRescanned() throws Exception { // First iteration should find the unhealthy container. scanContainer(unhealthy); verifyContainerMarkedUnhealthy(unhealthy, atMostOnce()); - OnDemandScannerMetrics metrics = OnDemandContainerDataScanner.getMetrics(); + OnDemandScannerMetrics metrics = onDemandScanner.getMetrics(); assertEquals(1, metrics.getNumContainersScanned()); assertEquals(1, metrics.getNumUnHealthyContainers()); @@ -296,9 +286,8 @@ public void testUnhealthyContainerRescanned() throws Exception { } private void scanContainer(Container container) throws Exception { - OnDemandContainerDataScanner.init(conf, controller); Optional> scanFuture = - OnDemandContainerDataScanner.scanContainer(container); + onDemandScanner.scanContainer(container); if (scanFuture.isPresent()) { scanFuture.get().get(); } From 577a075fa182ee80c3b407975b1b6b53bc9d5f8e Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 16 Apr 2025 12:00:23 -0400 Subject: [PATCH 24/62] Add unit test for ContainerSet --- .../container/common/impl/TestContainerSet.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java index 84d44534525f..45e8e6577895 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java @@ -37,6 +37,7 @@ import java.util.Optional; import java.util.Random; import java.util.UUID; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.LongStream; import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -48,6 +49,7 @@ import org.apache.hadoop.ozone.container.keyvalue.ContainerLayoutTestInfo; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.junit.jupiter.api.Test; /** * Class used to test ContainerSet operations. @@ -284,6 +286,21 @@ public void testListContainerFromFirstKey(ContainerLayoutVersion layout) assertContainerIds(FIRST_ID, count, result); } + @ContainerLayoutTestInfo.ContainerTest + public void testContainerScanHandler(ContainerLayoutVersion layout) throws Exception { + setLayoutVersion(layout); + ContainerSet containerSet = createContainerSet(); + // Atomic long required since lambda modification must be effectively final. + AtomicLong invocationCount = new AtomicLong(); + containerSet.registerContainerScanHandler(c -> { + assertEquals(c.getContainerData().getContainerID(), FIRST_ID); + invocationCount.getAndIncrement(); + }); + + containerSet.scanContainer(FIRST_ID); + assertEquals(1, invocationCount.get()); + } + /** * Verify that {@code result} contains {@code count} containers * with IDs in increasing order starting at {@code startId}. From 4c8d8436a1f86c82c675d6f166d7e62c624994ef Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 16 Apr 2025 12:02:02 -0400 Subject: [PATCH 25/62] Checkstyle --- .../apache/hadoop/ozone/container/common/impl/ContainerSet.java | 1 - .../hadoop/ozone/container/common/impl/TestContainerSet.java | 1 - 2 files changed, 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index 28412fa100cc..ca0ea191fcf3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -38,7 +38,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; - import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java index 45e8e6577895..b14ff219eecc 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java @@ -49,7 +49,6 @@ import org.apache.hadoop.ozone.container.keyvalue.ContainerLayoutTestInfo; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; -import org.junit.jupiter.api.Test; /** * Class used to test ContainerSet operations. From 0bd4127052ee2abb55301ff3e81a950305bf0a41 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 16 Apr 2025 12:15:58 -0400 Subject: [PATCH 26/62] Improve comments and test --- .../hadoop/ozone/container/common/impl/ContainerSet.java | 2 +- .../ozone/container/common/impl/TestContainerSet.java | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index ca0ea191fcf3..8204f58953c8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -67,7 +67,7 @@ public class ContainerSet implements Iterable> { private Clock clock; private long recoveringTimeout; private final Table containerIdsTable; - // Handler that will be invoked when a user of a container reports an error. + // Handler that will be invoked when a scan of a container in this set is requested. private Consumer> containerScanHandler; @VisibleForTesting diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java index b14ff219eecc..377b921297d3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java @@ -289,13 +289,20 @@ public void testListContainerFromFirstKey(ContainerLayoutVersion layout) public void testContainerScanHandler(ContainerLayoutVersion layout) throws Exception { setLayoutVersion(layout); ContainerSet containerSet = createContainerSet(); - // Atomic long required since lambda modification must be effectively final. + // Scan when no handler is registered should not throw an exception. + containerSet.scanContainer(FIRST_ID); + + // Scan of non-existent container should not throw exception or trigger the handler. + containerSet.scanContainer(FIRST_ID - 1); AtomicLong invocationCount = new AtomicLong(); containerSet.registerContainerScanHandler(c -> { + // If the handler was incorrectly triggered for a non-existent container, this assert would fail. assertEquals(c.getContainerData().getContainerID(), FIRST_ID); invocationCount.getAndIncrement(); }); + assertEquals(0, invocationCount.get()); + // Only scan of an existing container when a handler is registered should trigger a scan. containerSet.scanContainer(FIRST_ID); assertEquals(1, invocationCount.get()); } From 61f30f304e3f6828081645bf4f30a5a957d38ef9 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 17 Apr 2025 16:12:33 -0400 Subject: [PATCH 27/62] WIP migrate reconciliation unit tests --- .../keyvalue/TestKeyValueHandler.java | 334 +----------- ...eyValueHandlerContainerReconciliation.java | 503 ++++++++++++++++++ 2 files changed, 511 insertions(+), 326 deletions(-) create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 31ac4580ec34..667f266c97ad 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -152,8 +152,6 @@ public class TestKeyValueHandler { private static final long DUMMY_CONTAINER_ID = 9999; private static final String DUMMY_PATH = "dummy/dir/doesnt/exist"; - private static final int CHUNK_LEN = 3 * (int) OzoneConsts.KB; - private static final int CHUNKS_PER_BLOCK = 4; private static final String DATANODE_UUID = UUID.randomUUID().toString(); private static final String CLUSTER_ID = UUID.randomUUID().toString(); @@ -162,26 +160,6 @@ public class TestKeyValueHandler { private OzoneConfiguration conf; private ContainerSet mockContainerSet; - /** - * Number of corrupt blocks and chunks. - */ - public static Stream corruptionValues() { - return Stream.of( - Arguments.of(5, 0), - Arguments.of(0, 5), - Arguments.of(0, 10), - Arguments.of(10, 0), - Arguments.of(5, 10), - Arguments.of(10, 5), - Arguments.of(2, 3), - Arguments.of(3, 2), - Arguments.of(4, 6), - Arguments.of(6, 4), - Arguments.of(6, 9), - Arguments.of(9, 6) - ); - } - @BeforeEach public void setup() throws IOException { // Create mock HddsDispatcher and KeyValueHandler. @@ -204,7 +182,6 @@ public void setup() throws IOException { mock(ContainerMetrics.class), mock(TokenVerifier.class) ); - } /** @@ -593,151 +570,6 @@ public void testContainerChecksumInvocation(ContainerLayoutVersion layoutVersion Assertions.assertEquals(1, icrCount.get()); } - @ParameterizedTest - @MethodSource("corruptionValues") - public void testFullContainerReconciliation(int numBlocks, int numChunks) throws Exception { - KeyValueHandler kvHandler = createKeyValueHandler(tempDir); - ContainerChecksumTreeManager checksumManager = kvHandler.getChecksumManager(); - - ContainerController controller = new ContainerController(mockContainerSet, - Collections.singletonMap(ContainerType.KeyValueContainer, kvHandler)); - OnDemandContainerDataScanner.init(conf.getObject(ContainerScannerConfiguration.class), controller, checksumManager); - - DNContainerOperationClient dnClient = new DNContainerOperationClient(conf, null, null); - final long containerID = 100L; - // Create 3 containers with 15 blocks each and 3 replicas. - List containers = createContainerWithBlocks(kvHandler, containerID, 15, 3); - assertEquals(3, containers.size()); - - // Introduce corruption in each container on different replicas. - introduceCorruption(kvHandler, containers.get(1), numBlocks, numChunks, false); - introduceCorruption(kvHandler, containers.get(2), numBlocks, numChunks, true); - // Use synchronous on-demand scans to re-build the merkle trees after corruption. - waitForContainerScans(containers); - - // Without reconciliation, checksums should be different because of the corruption. - Set checksumsBeforeReconciliation = new HashSet<>(); - for (KeyValueContainer kvContainer : containers) { - Optional containerChecksumInfo = - checksumManager.read(kvContainer.getContainerData()); - assertTrue(containerChecksumInfo.isPresent()); - long dataChecksum = containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum(); - assertEquals(kvContainer.getContainerData().getDataChecksum(), dataChecksum); - checksumsBeforeReconciliation.add(dataChecksum); - } - // There should be more than 1 checksum because of the corruption. - assertTrue(checksumsBeforeReconciliation.size() > 1); - - List datanodes = ImmutableList.of(randomDatanodeDetails(), randomDatanodeDetails(), - randomDatanodeDetails()); - Map dnToContainerMap = new HashMap<>(); - dnToContainerMap.put(datanodes.get(0).getUuidString(), containers.get(0)); - dnToContainerMap.put(datanodes.get(1).getUuidString(), containers.get(1)); - dnToContainerMap.put(datanodes.get(2).getUuidString(), containers.get(2)); - - // Setup mock for each datanode network calls needed for reconciliation. - try (MockedStatic containerProtocolMock = - Mockito.mockStatic(ContainerProtocolCalls.class)) { - mockContainerProtocolCalls(containerProtocolMock, dnToContainerMap, checksumManager, kvHandler, containerID); - - kvHandler.reconcileContainer(dnClient, containers.get(0), datanodes); - kvHandler.reconcileContainer(dnClient, containers.get(1), datanodes); - kvHandler.reconcileContainer(dnClient, containers.get(2), datanodes); - - // After reconciliation, checksums should be the same for all containers. - // Reconciliation should have updated the tree based on the updated metadata that was obtained for the - // previously corrupted data. We do not need to wait for the full data scan to complete. - ContainerProtos.ContainerChecksumInfo prevContainerChecksumInfo = null; - for (KeyValueContainer kvContainer : containers) { - kvHandler.createContainerMerkleTreeFromMetadata(kvContainer); - Optional containerChecksumInfo = - checksumManager.read(kvContainer.getContainerData()); - assertTrue(containerChecksumInfo.isPresent()); - long dataChecksum = containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum(); - assertEquals(kvContainer.getContainerData().getDataChecksum(), dataChecksum); - if (prevContainerChecksumInfo != null) { - assertEquals(prevContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(), dataChecksum); - } - prevContainerChecksumInfo = containerChecksumInfo.get(); - } - } - } - - public void waitForContainerScans(List containers) throws Exception { - for (KeyValueContainer container: containers) { - // The on-demand scanner has been initialized to pull from the mock container set. - // Make it pull the corresponding container instance to scan in this run based on ID. - long containerID = container.getContainerData().getContainerID(); - Mockito.doReturn(container).when(mockContainerSet).getContainer(containerID); - - Optional> scanFuture = OnDemandContainerDataScanner.scanContainer(container); - assertTrue(scanFuture.isPresent()); - // Wait for on-demand scan to complete. - scanFuture.get().get(); - } - } - - private void mockContainerProtocolCalls(MockedStatic containerProtocolMock, - Map dnToContainerMap, - ContainerChecksumTreeManager checksumManager, - KeyValueHandler kvHandler, - long containerID) { - // Mock getContainerChecksumInfo - containerProtocolMock.when(() -> ContainerProtocolCalls.getContainerChecksumInfo(any(), anyLong(), any())) - .thenAnswer(inv -> { - XceiverClientSpi xceiverClientSpi = inv.getArgument(0); - Pipeline pipeline = xceiverClientSpi.getPipeline(); - assertEquals(1, pipeline.size()); - DatanodeDetails dn = pipeline.getFirstNode(); - KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); - ByteString checksumInfo = checksumManager.getContainerChecksumInfo(container.getContainerData()); - return ContainerProtos.GetContainerChecksumInfoResponseProto.newBuilder() - .setContainerID(containerID) - .setContainerChecksumInfo(checksumInfo) - .build(); - }); - - // Mock getBlock - containerProtocolMock.when(() -> ContainerProtocolCalls.getBlock(any(), any(), any(), any(), anyMap())) - .thenAnswer(inv -> { - XceiverClientSpi xceiverClientSpi = inv.getArgument(0); - Pipeline pipeline = xceiverClientSpi.getPipeline(); - assertEquals(1, pipeline.size()); - DatanodeDetails dn = pipeline.getFirstNode(); - KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); - ContainerProtos.BlockData blockData = kvHandler.getBlockManager().getBlock(container, inv.getArgument(2)) - .getProtoBufMessage(); - return ContainerProtos.GetBlockResponseProto.newBuilder() - .setBlockData(blockData) - .build(); - }); - - // Mock readChunk - containerProtocolMock.when(() -> ContainerProtocolCalls.readChunk(any(), any(), any(), any(), any())) - .thenAnswer(inv -> { - XceiverClientSpi xceiverClientSpi = inv.getArgument(0); - Pipeline pipeline = xceiverClientSpi.getPipeline(); - assertEquals(1, pipeline.size()); - DatanodeDetails dn = pipeline.getFirstNode(); - KeyValueContainer container = dnToContainerMap.get(dn.getUuidString()); - return createReadChunkResponse(inv, container, kvHandler); - }); - } - - // Helper method to create readChunk responses - private ContainerProtos.ReadChunkResponseProto createReadChunkResponse(InvocationOnMock inv, - KeyValueContainer container, - KeyValueHandler kvHandler) throws IOException { - ContainerProtos.DatanodeBlockID blockId = inv.getArgument(2); - ContainerProtos.ChunkInfo chunkInfo = inv.getArgument(1); - return ContainerProtos.ReadChunkResponseProto.newBuilder() - .setBlockID(blockId) - .setChunkData(chunkInfo) - .setData(kvHandler.getChunkManager().readChunk(container, BlockID.getFromProtobuf(blockId), - ChunkInfo.getFromProtoBuf(chunkInfo), null).toByteString()) - .build(); - } - @Test public void testGetContainerChecksumInfoOnInvalidContainerStates() { when(handler.handleGetContainerChecksumInfo(any(), any())).thenCallRealMethod(); @@ -842,6 +674,7 @@ private static ContainerCommandRequestProto createContainerRequest( private KeyValueHandler createKeyValueHandler(Path path) throws IOException { final ContainerSet containerSet = new ContainerSet(1000); + final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); HddsVolume hddsVolume = new HddsVolume.Builder(path.toString()).conf(conf) @@ -859,165 +692,14 @@ private KeyValueHandler createKeyValueHandler(Path path) throws IOException { hddsVolume.getVolumeInfoStats().unregister(); hddsVolume.getVolumeIOStats().unregister(); ContainerMetrics.remove(); - return kvHandler; - } - /** - * Creates a container with normal and deleted blocks. - * First it will insert normal blocks, and then it will insert - * deleted blocks. - */ - protected List createContainerWithBlocks(KeyValueHandler kvHandler, long containerId, - int blocks, int numContainerCopy) - throws Exception { - String strBlock = "block"; - String strChunk = "chunkFile"; - List containers = new ArrayList<>(); - MutableVolumeSet volumeSet = new MutableVolumeSet(DATANODE_UUID, conf, null, - StorageVolume.VolumeType.DATA_VOLUME, null); - createDbInstancesForTestIfNeeded(volumeSet, CLUSTER_ID, CLUSTER_ID, conf); - int bytesPerChecksum = 2 * (int) OzoneConsts.KB; - Checksum checksum = new Checksum(ContainerProtos.ChecksumType.SHA256, - bytesPerChecksum); - byte[] chunkData = RandomStringUtils.randomAscii(CHUNK_LEN).getBytes(UTF_8); - ChecksumData checksumData = checksum.computeChecksum(chunkData); - - for (int j = 0; j < numContainerCopy; j++) { - KeyValueContainerData containerData = new KeyValueContainerData(containerId, - ContainerLayoutVersion.FILE_PER_BLOCK, (long) CHUNKS_PER_BLOCK * CHUNK_LEN * blocks, - UUID.randomUUID().toString(), UUID.randomUUID().toString()); - Path kvContainerPath = Files.createDirectory(tempDir.resolve(containerId + "-" + j)); - containerData.setMetadataPath(kvContainerPath.toString()); - containerData.setDbFile(kvContainerPath.toFile()); - - KeyValueContainer container = new KeyValueContainer(containerData, conf); - StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()) - .forEach(hddsVolume -> hddsVolume.setDbParentDir(kvContainerPath.toFile())); - container.create(volumeSet, new RoundRobinVolumeChoosingPolicy(), UUID.randomUUID().toString()); - assertNotNull(containerData.getChunksPath()); - File chunksPath = new File(containerData.getChunksPath()); - ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, 0, 0); - - List chunkList = new ArrayList<>(); - for (int i = 0; i < blocks; i++) { - BlockID blockID = new BlockID(containerId, i); - BlockData blockData = new BlockData(blockID); - - chunkList.clear(); - for (long chunkCount = 0; chunkCount < CHUNKS_PER_BLOCK; chunkCount++) { - String chunkName = strBlock + i + strChunk + chunkCount; - long offset = chunkCount * CHUNK_LEN; - ChunkInfo info = new ChunkInfo(chunkName, offset, CHUNK_LEN); - info.setChecksumData(checksumData); - chunkList.add(info.getProtoBufMessage()); - kvHandler.getChunkManager().writeChunk(container, blockID, info, - ByteBuffer.wrap(chunkData), WRITE_STAGE); - } - kvHandler.getChunkManager().finishWriteChunks(container, blockData); - blockData.setChunks(chunkList); - blockData.setBlockCommitSequenceId(i); - kvHandler.getBlockManager().putBlock(container, blockData); - } - - ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, blocks, (long) blocks * CHUNKS_PER_BLOCK); - container.markContainerForClose(); - kvHandler.closeContainer(container); - containers.add(container); - } - - return containers; - } - - /** - * Introduce corruption in the container. - * 1. Delete blocks from the container. - * 2. Corrupt chunks at an offset. - * If revers is true, the blocks and chunks are deleted in reverse order. - */ - private void introduceCorruption(KeyValueHandler kvHandler, KeyValueContainer keyValueContainer, int numBlocks, - int numChunks, boolean reverse) throws IOException { - Random random = new Random(); - KeyValueContainerData containerData = keyValueContainer.getContainerData(); - // Simulate missing blocks - try (DBHandle handle = BlockUtils.getDB(containerData, conf); - BatchOperation batch = handle.getStore().getBatchHandler().initBatchOperation()) { - List blockDataList = kvHandler.getBlockManager().listBlock(keyValueContainer, -1, 100); - int size = blockDataList.size(); - for (int i = 0; i < numBlocks; i++) { - BlockData blockData = reverse ? blockDataList.get(size - 1 - i) : blockDataList.get(i); - File blockFile = getBlock(keyValueContainer, blockData.getBlockID().getLocalID()); - Assertions.assertTrue(blockFile.delete()); - handle.getStore().getBlockDataTable().deleteWithBatch(batch, containerData.getBlockKey(blockData.getLocalID())); - } - handle.getStore().getBatchHandler().commitBatchOperation(batch); - } -// Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); -// kvHandler.createContainerMerkleTreeFromMetadata(keyValueContainer); - - // Corrupt chunks at an offset. - List blockDataList = kvHandler.getBlockManager().listBlock(keyValueContainer, -1, 100); - int size = blockDataList.size(); - for (int i = 0; i < numChunks; i++) { - int blockIndex = reverse ? size - 1 - (i % size) : i % size; - BlockData blockData = blockDataList.get(blockIndex); - int chunkIndex = i / size; - File blockFile = getBlock(keyValueContainer, blockData.getBlockID().getLocalID()); - List chunks = new ArrayList<>(blockData.getChunks()); - ContainerProtos.ChunkInfo chunkInfo = chunks.remove(chunkIndex); - corruptFileAtOffset(blockFile, (int) chunkInfo.getOffset(), (int) chunkInfo.getLen()); - - // TODO: On-demand scanner (HDDS-10374) should detect this corruption and generate container merkle tree. -// ContainerProtos.ContainerChecksumInfo.Builder builder = kvHandler.getChecksumManager() -// .read(containerData).get().toBuilder(); -// List blockMerkleTreeList = builder.getContainerMerkleTree() -// .getBlockMerkleTreeList(); -// assertEquals(size, blockMerkleTreeList.size()); - -// builder.getContainerMerkleTreeBuilder().clearBlockMerkleTree(); -// for (int j = 0; j < blockMerkleTreeList.size(); j++) { -// ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTreeList.get(j).toBuilder(); -// if (j == blockIndex) { -// List chunkMerkleTreeBuilderList = -// blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); -// chunkMerkleTreeBuilderList.get(chunkIndex).setIsHealthy(false).setDataChecksum(random.nextLong()); -// blockMerkleTreeBuilder.setDataChecksum(random.nextLong()); -// } -// builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); -// } -// builder.getContainerMerkleTreeBuilder().setDataChecksum(random.nextLong()); -// Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); -// writeContainerDataTreeProto(keyValueContainer.getContainerData(), builder.getContainerMerkleTree()); - } - } - - /** - * Overwrite the file with random bytes at an offset within the given length. - */ - public static void corruptFileAtOffset(File file, int offset, int chunkLength) { - try { - final int fileLength = (int) file.length(); - assertTrue(fileLength >= offset + chunkLength); - final int chunkEnd = offset + chunkLength; - - Path path = file.toPath(); - final byte[] original = IOUtils.readFully(Files.newInputStream(path), fileLength); - - // Corrupt the last byte and middle bytes of the block. The scanner should log this as two errors. - final byte[] corruptedBytes = Arrays.copyOf(original, fileLength); - corruptedBytes[chunkEnd - 1] = (byte) (original[chunkEnd - 1] << 1); - final long chunkMid = offset + ((long) chunkLength - offset) / 2; - corruptedBytes[(int) (chunkMid / 2)] = (byte) (original[(int) (chunkMid / 2)] << 1); - - - Files.write(path, corruptedBytes, - StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); + // Register the on-demand container scanner with the container set used by the KeyValueHandler. + ContainerController controller = new ContainerController(containerSet, + Collections.singletonMap(ContainerType.KeyValueContainer, kvHandler)); + OnDemandContainerDataScanner onDemandScanner = new OnDemandContainerDataScanner( + conf.getObject(ContainerScannerConfiguration.class), controller, kvHandler.getChecksumManager()); + containerSet.registerContainerScanHandler(onDemandScanner::scanContainer); - assertThat(IOUtils.readFully(Files.newInputStream(path), fileLength)) - .isEqualTo(corruptedBytes) - .isNotEqualTo(original); - } catch (IOException ex) { - // Fail the test. - throw new UncheckedIOException(ex); - } + return kvHandler; } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java new file mode 100644 index 000000000000..a4b63d9c2cd5 --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java @@ -0,0 +1,503 @@ +package org.apache.hadoop.ozone.container.keyvalue; + +import com.google.common.collect.ImmutableList; +import org.apache.avro.generic.GenericData; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.hdds.client.BlockID; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.XceiverClientSpi; +import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.common.Checksum; +import org.apache.hadoop.ozone.common.ChecksumData; +import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; +import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; +import org.apache.hadoop.ozone.container.common.ContainerTestUtils; +import org.apache.hadoop.ozone.container.common.helpers.BlockData; +import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; +import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; +import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; +import org.apache.hadoop.ozone.container.common.impl.ContainerSet; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; +import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; +import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; +import org.apache.hadoop.ozone.container.common.volume.StorageVolume; +import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; +import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; +import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; +import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerDataScanner; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; + +import java.io.File; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Random; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.Future; +import java.util.stream.Stream; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; +import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; +import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.WRITE_STAGE; +import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.getBlock; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * This unit test simulates three datanodes with replicas of a container that need to be reconciled. + * It creates three KeyValueHandler instances to represent each datanode, and each instance is working on a container + * replica that is stored in a local directory. The reconciliation client is mocked to return the corresponding local + * container for each datanode peer. + */ +public class TestKeyValueHandlerContainerReconciliation { + /** + * Number of corrupt blocks and chunks. + */ + public static Stream corruptionValues() { + return Stream.of( + Arguments.of(5, 0), + Arguments.of(0, 5), + Arguments.of(0, 10), + Arguments.of(10, 0), + Arguments.of(5, 10), + Arguments.of(10, 5), + Arguments.of(2, 3), + Arguments.of(3, 2), + Arguments.of(4, 6), + Arguments.of(6, 4), + Arguments.of(6, 9), + Arguments.of(9, 6) + ); + } + + // All container replicas will be placed in this directory, and the same replicas will be re-used for each test run. + @TempDir + private static Path containerDir; + private static DNContainerOperationClient dnClient; + private static MockedStatic containerProtocolMock; + private static Map datanodes; + private static OzoneConfiguration conf; + + private static final String CLUSTER_ID = UUID.randomUUID().toString(); + private static final long CONTAINER_ID = 100L; + private static final int CHUNK_LEN = 3 * (int) OzoneConsts.KB; + private static final int CHUNKS_PER_BLOCK = 4; + private static final int NUM_DATANODES = 3; + + /** + * Use the same container instances throughout the tests. Each reconciliation should make a full repair, resetting + * the state for the next test. + */ + @BeforeAll + public static void setup() throws Exception { + conf = new OzoneConfiguration(); + conf.set(HDDS_DATANODE_DIR_KEY, containerDir.toString()); + conf.set(OZONE_METADATA_DIRS, containerDir.toString()); + dnClient = new DNContainerOperationClient(conf, null, null); + datanodes = new HashMap<>(); + + // Create a container with 3 replicas and 15 blocks each. + for (int i = 0; i < NUM_DATANODES; i++) { + DatanodeDetails dnDetails = randomDatanodeDetails(); + MockDatanode dn = new MockDatanode(dnDetails, conf, containerDir); + dn.addContainerWithBlocks(CONTAINER_ID, 15); + datanodes.put(dnDetails, dn); + } + + containerProtocolMock = Mockito.mockStatic(ContainerProtocolCalls.class); + mockContainerProtocolCalls(); + } + + @AfterAll + public static void teardown() { + containerProtocolMock.close(); + } + + + @ParameterizedTest + @MethodSource("corruptionValues") + public void testFullContainerReconciliation(int numBlocks, int numChunks) throws Exception { + KeyValueHandler kvHandler = createKeyValueHandler(containerDir); + ContainerChecksumTreeManager checksumManager = kvHandler.getChecksumManager(); + + // Introduce corruption in each container on different replicas. + introduceCorruption(kvHandler, containers.get(1), numBlocks, numChunks, false); + introduceCorruption(kvHandler, containers.get(2), numBlocks, numChunks, true); + // Use synchronous on-demand scans to re-build the merkle trees after corruption. + waitForContainerScans(containers); + + // Without reconciliation, checksums should be different because of the corruption. + Set checksumsBeforeReconciliation = new HashSet<>(); + for (KeyValueContainer kvContainer : containers) { + Optional containerChecksumInfo = + checksumManager.read(kvContainer.getContainerData()); + assertTrue(containerChecksumInfo.isPresent()); + long dataChecksum = containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum(); + assertEquals(kvContainer.getContainerData().getDataChecksum(), dataChecksum); + checksumsBeforeReconciliation.add(dataChecksum); + } + // There should be more than 1 checksum because of the corruption. + assertTrue(checksumsBeforeReconciliation.size() > 1); + + + // Setup mock for each datanode network calls needed for reconciliation. + try (MockedStatic containerProtocolMock = + Mockito.mockStatic(ContainerProtocolCalls.class)) { + mockContainerProtocolCalls(containerProtocolMock, dnToContainerMap, checksumManager, kvHandler, CONTAINER_ID); + + kvHandler.reconcileContainer(dnClient, containers.get(0), datanodes); + kvHandler.reconcileContainer(dnClient, containers.get(1), datanodes); + kvHandler.reconcileContainer(dnClient, containers.get(2), datanodes); + + // After reconciliation, checksums should be the same for all containers. + // Reconciliation should have updated the tree based on the updated metadata that was obtained for the + // previously corrupted data. We do not need to wait for the full data scan to complete. + ContainerProtos.ContainerChecksumInfo prevContainerChecksumInfo = null; + for (KeyValueContainer kvContainer : containers) { + kvHandler.createContainerMerkleTreeFromMetadata(kvContainer); + Optional containerChecksumInfo = + checksumManager.read(kvContainer.getContainerData()); + assertTrue(containerChecksumInfo.isPresent()); + long dataChecksum = containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum(); + assertEquals(kvContainer.getContainerData().getDataChecksum(), dataChecksum); + if (prevContainerChecksumInfo != null) { + assertEquals(prevContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(), dataChecksum); + } + prevContainerChecksumInfo = containerChecksumInfo.get(); + } + } + } + + public void waitForContainerScans(List containers) throws Exception { + for (KeyValueContainer container: containers) { + // The on-demand scanner has been initialized to pull from the mock container set. + // Make it pull the corresponding container instance to scan in this run based on ID. + long containerID = container.getContainerData().getContainerID(); + Mockito.doReturn(container).when(mockContainerSet).getContainer(containerID); + + Optional> scanFuture = OnDemandContainerDataScanner.scanContainer(container); + assertTrue(scanFuture.isPresent()); + // Wait for on-demand scan to complete. + scanFuture.get().get(); + } + } + + private static void mockContainerProtocolCalls() { + // Mock getContainerChecksumInfo + containerProtocolMock.when(() -> ContainerProtocolCalls.getContainerChecksumInfo(any(), anyLong(), any())) + .thenAnswer(inv -> { + XceiverClientSpi xceiverClientSpi = inv.getArgument(0); + long containerID = inv.getArgument(1); + Pipeline pipeline = xceiverClientSpi.getPipeline(); + assertEquals(1, pipeline.size()); + DatanodeDetails dn = pipeline.getFirstNode(); + return datanodes.get(dn).getChecksumInfo(containerID); + }); + + // Mock getBlock + containerProtocolMock.when(() -> ContainerProtocolCalls.getBlock(any(), any(), any(), any(), anyMap())) + .thenAnswer(inv -> { + XceiverClientSpi xceiverClientSpi = inv.getArgument(0); + BlockID blockID = inv.getArgument(2); + Pipeline pipeline = xceiverClientSpi.getPipeline(); + assertEquals(1, pipeline.size()); + DatanodeDetails dn = pipeline.getFirstNode(); + return datanodes.get(dn).getBlock(blockID); + }); + + // Mock readChunk + containerProtocolMock.when(() -> ContainerProtocolCalls.readChunk(any(), any(), any(), any(), any())) + .thenAnswer(inv -> { + XceiverClientSpi xceiverClientSpi = inv.getArgument(0); + ContainerProtos.ChunkInfo chunkInfo = inv.getArgument(1); + ContainerProtos.DatanodeBlockID blockId = inv.getArgument(2); + Pipeline pipeline = xceiverClientSpi.getPipeline(); + assertEquals(1, pipeline.size()); + DatanodeDetails dn = pipeline.getFirstNode(); + return datanodes.get(dn).readChunk(blockId, chunkInfo); + }); + } + + /** + * Introduce corruption in the container. + * 1. Delete blocks from the container. + * 2. Corrupt chunks at an offset. + * If revers is true, the blocks and chunks are deleted in reverse order. + */ + private void introduceCorruption(KeyValueHandler kvHandler, KeyValueContainer keyValueContainer, int numBlocks, + int numChunks, boolean reverse) throws IOException { + KeyValueContainerData containerData = keyValueContainer.getContainerData(); + // Simulate missing blocks + try (DBHandle handle = BlockUtils.getDB(containerData, conf); + BatchOperation batch = handle.getStore().getBatchHandler().initBatchOperation()) { + List blockDataList = kvHandler.getBlockManager().listBlock(keyValueContainer, -1, 100); + int size = blockDataList.size(); + for (int i = 0; i < numBlocks; i++) { + BlockData blockData = reverse ? blockDataList.get(size - 1 - i) : blockDataList.get(i); + File blockFile = getBlock(keyValueContainer, blockData.getBlockID().getLocalID()); + Assertions.assertTrue(blockFile.delete()); + handle.getStore().getBlockDataTable().deleteWithBatch(batch, containerData.getBlockKey(blockData.getLocalID())); + } + handle.getStore().getBatchHandler().commitBatchOperation(batch); + } +// Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); +// kvHandler.createContainerMerkleTreeFromMetadata(keyValueContainer); + + // Corrupt chunks at an offset. + List blockDataList = kvHandler.getBlockManager().listBlock(keyValueContainer, -1, 100); + int size = blockDataList.size(); + for (int i = 0; i < numChunks; i++) { + int blockIndex = reverse ? size - 1 - (i % size) : i % size; + BlockData blockData = blockDataList.get(blockIndex); + int chunkIndex = i / size; + File blockFile = getBlock(keyValueContainer, blockData.getBlockID().getLocalID()); + List chunks = new ArrayList<>(blockData.getChunks()); + ContainerProtos.ChunkInfo chunkInfo = chunks.remove(chunkIndex); + corruptFileAtOffset(blockFile, (int) chunkInfo.getOffset(), (int) chunkInfo.getLen()); + + // TODO: On-demand scanner (HDDS-10374) should detect this corruption and generate container merkle tree. +// ContainerProtos.ContainerChecksumInfo.Builder builder = kvHandler.getChecksumManager() +// .read(containerData).get().toBuilder(); +// List blockMerkleTreeList = builder.getContainerMerkleTree() +// .getBlockMerkleTreeList(); +// assertEquals(size, blockMerkleTreeList.size()); + +// builder.getContainerMerkleTreeBuilder().clearBlockMerkleTree(); +// for (int j = 0; j < blockMerkleTreeList.size(); j++) { +// ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTreeList.get(j).toBuilder(); +// if (j == blockIndex) { +// List chunkMerkleTreeBuilderList = +// blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); +// chunkMerkleTreeBuilderList.get(chunkIndex).setIsHealthy(false).setDataChecksum(random.nextLong()); +// blockMerkleTreeBuilder.setDataChecksum(random.nextLong()); +// } +// builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); +// } +// builder.getContainerMerkleTreeBuilder().setDataChecksum(random.nextLong()); +// Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); +// writeContainerDataTreeProto(keyValueContainer.getContainerData(), builder.getContainerMerkleTree()); + } + } + + /** + * Overwrite the file with random bytes at an offset within the given length. + */ + public static void corruptFileAtOffset(File file, int offset, int chunkLength) { + try { + final int fileLength = (int) file.length(); + assertTrue(fileLength >= offset + chunkLength); + final int chunkEnd = offset + chunkLength; + + Path path = file.toPath(); + final byte[] original = IOUtils.readFully(Files.newInputStream(path), fileLength); + + // Corrupt the last byte and middle bytes of the block. The scanner should log this as two errors. + final byte[] corruptedBytes = Arrays.copyOf(original, fileLength); + corruptedBytes[chunkEnd - 1] = (byte) (original[chunkEnd - 1] << 1); + final long chunkMid = offset + ((long) chunkLength - offset) / 2; + corruptedBytes[(int) (chunkMid / 2)] = (byte) (original[(int) (chunkMid / 2)] << 1); + + + Files.write(path, corruptedBytes, + StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); + + assertThat(IOUtils.readFully(Files.newInputStream(path), fileLength)) + .isEqualTo(corruptedBytes) + .isNotEqualTo(original); + } catch (IOException ex) { + // Fail the test. + throw new UncheckedIOException(ex); + } + } + + private static class MockDatanode { + private final KeyValueHandler handler; + private final DatanodeDetails dnDetails; + private final OnDemandContainerDataScanner onDemandScanner; + private final ContainerSet containerSet; + private final ConfigurationSource conf; + + public MockDatanode(DatanodeDetails dnDetails, ConfigurationSource conf, Path tempDir) throws IOException { + this.dnDetails = dnDetails; + this.conf = conf; + containerSet = new ContainerSet(1000); + handler = createKeyValueHandler(tempDir); + ContainerController controller = new ContainerController(containerSet, + Collections.singletonMap(ContainerProtos.ContainerType.KeyValueContainer, handler)); + onDemandScanner = new OnDemandContainerDataScanner( + conf.getObject(ContainerScannerConfiguration.class), controller, handler.getChecksumManager()); + // Register the on-demand container scanner with the container set used by the KeyValueHandler. + containerSet.registerContainerScanHandler(onDemandScanner::scanContainer); + } + + public ContainerProtos.GetContainerChecksumInfoResponseProto getChecksumInfo(long containerID) throws IOException { + KeyValueContainer container = getContainer(containerID); + ByteString checksumInfo = handler.getChecksumManager().getContainerChecksumInfo(container.getContainerData()); + return ContainerProtos.GetContainerChecksumInfoResponseProto.newBuilder() + .setContainerID(containerID) + .setContainerChecksumInfo(checksumInfo) + .build(); + } + + public ContainerProtos.GetBlockResponseProto getBlock(BlockID blockID) throws IOException { + KeyValueContainer container = getContainer(blockID.getContainerID()); + ContainerProtos.BlockData blockData = handler.getBlockManager().getBlock(container, blockID).getProtoBufMessage(); + return ContainerProtos.GetBlockResponseProto.newBuilder() + .setBlockData(blockData) + .build(); + } + + public ContainerProtos.ReadChunkResponseProto readChunk(ContainerProtos.DatanodeBlockID blockId, + ContainerProtos.ChunkInfo chunkInfo) throws IOException { + KeyValueContainer container = getContainer(blockId.getContainerID()); + return ContainerProtos.ReadChunkResponseProto.newBuilder() + .setBlockID(blockId) + .setChunkData(chunkInfo) + .setData(handler.getChunkManager().readChunk(container, BlockID.getFromProtobuf(blockId), + ChunkInfo.getFromProtoBuf(chunkInfo), null).toByteString()) + .build(); + } + + public KeyValueContainer getContainer(long containerID) { + return (KeyValueContainer) containerSet.getContainer(containerID); + } + + public void scanContainer(long containerID) { +// onDemandScanner.scanContainer(containerSet.getContainer(containerID)); + } + + public void reconcileContainer(DNContainerOperationClient dnClient, Collection peers, + long containerID) throws IOException { + handler.reconcileContainer(dnClient, containerSet.getContainer(containerID), peers); + } + + private KeyValueHandler createKeyValueHandler(Path path) throws IOException { + final String dnUUID = dnDetails.getUuidString(); + final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); + + // TODO this path and addContainer in this class may be using different parts of the temp dir + HddsVolume hddsVolume = new HddsVolume.Builder(path.toString()) + .conf(conf) + .clusterID(CLUSTER_ID) + .datanodeUuid(dnUUID) + .volumeSet(volumeSet) + .build(); + hddsVolume.format(CLUSTER_ID); + hddsVolume.createWorkingDir(CLUSTER_ID, null); + hddsVolume.createTmpDirs(CLUSTER_ID); + when(volumeSet.getVolumesList()).thenReturn(Collections.singletonList(hddsVolume)); + final KeyValueHandler kvHandler = ContainerTestUtils.getKeyValueHandler(conf, + dnUUID, containerSet, volumeSet); + kvHandler.setClusterID(CLUSTER_ID); + // Clean up metrics for next tests. + hddsVolume.getVolumeInfoStats().unregister(); + hddsVolume.getVolumeIOStats().unregister(); + ContainerMetrics.remove(); + + return kvHandler; + } + + /** + * Creates a container with normal and deleted blocks. + * First it will insert normal blocks, and then it will insert + * deleted blocks. + */ + public void addContainerWithBlocks(long containerId, int blocks) throws Exception { + String strBlock = "block"; + String strChunk = "chunkFile"; + MutableVolumeSet volumeSet = new MutableVolumeSet(dnDetails.getUuidString(), conf, null, + StorageVolume.VolumeType.DATA_VOLUME, null); + createDbInstancesForTestIfNeeded(volumeSet, CLUSTER_ID, CLUSTER_ID, conf); + int bytesPerChecksum = 2 * (int) OzoneConsts.KB; + Checksum checksum = new Checksum(ContainerProtos.ChecksumType.SHA256, + bytesPerChecksum); + byte[] chunkData = RandomStringUtils.randomAscii(CHUNK_LEN).getBytes(UTF_8); + ChecksumData checksumData = checksum.computeChecksum(chunkData); + + KeyValueContainerData containerData = new KeyValueContainerData(containerId, + ContainerLayoutVersion.FILE_PER_BLOCK, (long) CHUNKS_PER_BLOCK * CHUNK_LEN * blocks, + UUID.randomUUID().toString(), UUID.randomUUID().toString()); + Path kvContainerPath = Files.createDirectory(containerDir.resolve(UUID.randomUUID().toString())); + containerData.setMetadataPath(kvContainerPath.toString()); + containerData.setDbFile(kvContainerPath.toFile()); + + KeyValueContainer container = new KeyValueContainer(containerData, conf); + StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()) + .forEach(hddsVolume -> hddsVolume.setDbParentDir(kvContainerPath.toFile())); + container.create(volumeSet, new RoundRobinVolumeChoosingPolicy(), UUID.randomUUID().toString()); + assertNotNull(containerData.getChunksPath()); + File chunksPath = new File(containerData.getChunksPath()); + ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, 0, 0); + + List chunkList = new ArrayList<>(); + for (int i = 0; i < blocks; i++) { + BlockID blockID = new BlockID(containerId, i); + BlockData blockData = new BlockData(blockID); + + chunkList.clear(); + for (long chunkCount = 0; chunkCount < CHUNKS_PER_BLOCK; chunkCount++) { + String chunkName = strBlock + i + strChunk + chunkCount; + long offset = chunkCount * CHUNK_LEN; + ChunkInfo info = new ChunkInfo(chunkName, offset, CHUNK_LEN); + info.setChecksumData(checksumData); + chunkList.add(info.getProtoBufMessage()); + handler.getChunkManager().writeChunk(container, blockID, info, + ByteBuffer.wrap(chunkData), WRITE_STAGE); + } + handler.getChunkManager().finishWriteChunks(container, blockData); + blockData.setChunks(chunkList); + blockData.setBlockCommitSequenceId(i); + handler.getBlockManager().putBlock(container, blockData); + + ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, blocks, (long) blocks * CHUNKS_PER_BLOCK); + container.markContainerForClose(); + handler.closeContainer(container); + } + containerSet.addContainer(container); + } + } +} From 192eb7b3b3b520fe16774e9ba59b707cc9247497 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 23 Apr 2025 19:52:30 -0400 Subject: [PATCH 28/62] Most tests passing Using seeded byte stream for each container On demand scan cool off is manually removed for now --- .../OnDemandContainerDataScanner.java | 5 +- ...eyValueHandlerContainerReconciliation.java | 516 +++++++++--------- 2 files changed, 268 insertions(+), 253 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index 40d12af8e991..abdec1abfe0f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -82,8 +82,9 @@ private boolean shouldScan(Container container) { return false; } - return !ContainerUtils.recentlyScanned(container, minScanGap, - LOG) && container.shouldScanData(); + return container.shouldScanData(); +// return !ContainerUtils.recentlyScanned(container, minScanGap, +// LOG) && container.shouldScanData(); } public Optional> scanContainer(Container container) { diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java index a4b63d9c2cd5..b944c6e959f5 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java @@ -1,36 +1,24 @@ package org.apache.hadoop.ozone.container.keyvalue; -import com.google.common.collect.ImmutableList; -import org.apache.avro.generic.GenericData; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.hdds.client.BlockID; -import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.XceiverClientSpi; -import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; import org.apache.hadoop.hdds.utils.db.BatchOperation; import org.apache.hadoop.ozone.OzoneConsts; import org.apache.hadoop.ozone.common.Checksum; import org.apache.hadoop.ozone.common.ChecksumData; -import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; import org.apache.hadoop.ozone.container.common.ContainerTestUtils; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; -import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; -import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; -import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; -import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; -import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; -import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; @@ -40,13 +28,11 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; import org.mockito.MockedStatic; import org.mockito.Mockito; -import org.mockito.invocation.InvocationOnMock; import java.io.File; import java.io.IOException; @@ -54,38 +40,36 @@ import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; +import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Random; -import java.util.Set; import java.util.UUID; +import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; +import java.util.function.Function; +import java.util.stream.Collectors; import java.util.stream.Stream; -import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.WRITE_STAGE; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.getBlock; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.fail; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.anyMap; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; /** * This unit test simulates three datanodes with replicas of a container that need to be reconciled. @@ -96,6 +80,8 @@ public class TestKeyValueHandlerContainerReconciliation { /** * Number of corrupt blocks and chunks. + * + * TODO HDDS-11942 support more combinations of corruptions. */ public static Stream corruptionValues() { return Stream.of( @@ -115,12 +101,12 @@ public static Stream corruptionValues() { } // All container replicas will be placed in this directory, and the same replicas will be re-used for each test run. - @TempDir +// @TempDir private static Path containerDir; private static DNContainerOperationClient dnClient; private static MockedStatic containerProtocolMock; - private static Map datanodes; - private static OzoneConfiguration conf; + private static List datanodes; + private static long healthyDataChecksum; private static final String CLUSTER_ID = UUID.randomUUID().toString(); private static final long CONTAINER_ID = 100L; @@ -134,99 +120,72 @@ public static Stream corruptionValues() { */ @BeforeAll public static void setup() throws Exception { - conf = new OzoneConfiguration(); - conf.set(HDDS_DATANODE_DIR_KEY, containerDir.toString()); - conf.set(OZONE_METADATA_DIRS, containerDir.toString()); - dnClient = new DNContainerOperationClient(conf, null, null); - datanodes = new HashMap<>(); + containerDir = Files.createTempDirectory("reconcile"); + dnClient = new DNContainerOperationClient(new OzoneConfiguration(), null, null); + datanodes = new ArrayList<>(); - // Create a container with 3 replicas and 15 blocks each. + // Create a container with 15 blocks and 3 replicas. for (int i = 0; i < NUM_DATANODES; i++) { DatanodeDetails dnDetails = randomDatanodeDetails(); - MockDatanode dn = new MockDatanode(dnDetails, conf, containerDir); + MockDatanode dn = new MockDatanode(dnDetails, containerDir); dn.addContainerWithBlocks(CONTAINER_ID, 15); - datanodes.put(dnDetails, dn); + datanodes.add(dn); } + datanodes.forEach(d -> d.scanContainer(CONTAINER_ID)); + healthyDataChecksum = assertUniqueChecksumCount(CONTAINER_ID, datanodes, 1); + containerProtocolMock = Mockito.mockStatic(ContainerProtocolCalls.class); mockContainerProtocolCalls(); } @AfterAll public static void teardown() { - containerProtocolMock.close(); + if (containerProtocolMock != null) { + containerProtocolMock.close(); + } } - @ParameterizedTest @MethodSource("corruptionValues") - public void testFullContainerReconciliation(int numBlocks, int numChunks) throws Exception { - KeyValueHandler kvHandler = createKeyValueHandler(containerDir); - ContainerChecksumTreeManager checksumManager = kvHandler.getChecksumManager(); - + public void testContainerReconciliation(int numBlocksToDelete, int numChunksToCorrupt) throws Exception { // Introduce corruption in each container on different replicas. - introduceCorruption(kvHandler, containers.get(1), numBlocks, numChunks, false); - introduceCorruption(kvHandler, containers.get(2), numBlocks, numChunks, true); - // Use synchronous on-demand scans to re-build the merkle trees after corruption. - waitForContainerScans(containers); + List dnsToCorrupt = datanodes.stream().limit(2).collect(Collectors.toList()); + dnsToCorrupt.get(0).introduceCorruption(CONTAINER_ID, numBlocksToDelete, numChunksToCorrupt, false); + dnsToCorrupt.get(1).introduceCorruption(CONTAINER_ID, numBlocksToDelete, numChunksToCorrupt, true); + // Use synchronous on-demand scans to re-build the merkle trees after corruption. + dnsToCorrupt.forEach(d -> d.scanContainer(CONTAINER_ID)); // Without reconciliation, checksums should be different because of the corruption. - Set checksumsBeforeReconciliation = new HashSet<>(); - for (KeyValueContainer kvContainer : containers) { - Optional containerChecksumInfo = - checksumManager.read(kvContainer.getContainerData()); - assertTrue(containerChecksumInfo.isPresent()); - long dataChecksum = containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum(); - assertEquals(kvContainer.getContainerData().getDataChecksum(), dataChecksum); - checksumsBeforeReconciliation.add(dataChecksum); - } - // There should be more than 1 checksum because of the corruption. - assertTrue(checksumsBeforeReconciliation.size() > 1); - - - // Setup mock for each datanode network calls needed for reconciliation. - try (MockedStatic containerProtocolMock = - Mockito.mockStatic(ContainerProtocolCalls.class)) { - mockContainerProtocolCalls(containerProtocolMock, dnToContainerMap, checksumManager, kvHandler, CONTAINER_ID); - - kvHandler.reconcileContainer(dnClient, containers.get(0), datanodes); - kvHandler.reconcileContainer(dnClient, containers.get(1), datanodes); - kvHandler.reconcileContainer(dnClient, containers.get(2), datanodes); - - // After reconciliation, checksums should be the same for all containers. - // Reconciliation should have updated the tree based on the updated metadata that was obtained for the - // previously corrupted data. We do not need to wait for the full data scan to complete. - ContainerProtos.ContainerChecksumInfo prevContainerChecksumInfo = null; - for (KeyValueContainer kvContainer : containers) { - kvHandler.createContainerMerkleTreeFromMetadata(kvContainer); - Optional containerChecksumInfo = - checksumManager.read(kvContainer.getContainerData()); - assertTrue(containerChecksumInfo.isPresent()); - long dataChecksum = containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum(); - assertEquals(kvContainer.getContainerData().getDataChecksum(), dataChecksum); - if (prevContainerChecksumInfo != null) { - assertEquals(prevContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(), dataChecksum); - } - prevContainerChecksumInfo = containerChecksumInfo.get(); - } - } + assertUniqueChecksumCount(CONTAINER_ID, datanodes, 3); + + List peers = datanodes.stream().map(MockDatanode::getDnDetails).collect(Collectors.toList()); + datanodes.forEach(d -> d.reconcileContainer(dnClient, peers, CONTAINER_ID)); + // After reconciliation, checksums should be the same for all containers. + // Reconciliation should have updated the tree based on the updated metadata that was obtained for the + // previously corrupted data. We do not need to wait for the full data scan to complete. + long repairedDataChecksum = assertUniqueChecksumCount(CONTAINER_ID, datanodes, 1); + assertEquals(healthyDataChecksum, repairedDataChecksum); } - public void waitForContainerScans(List containers) throws Exception { - for (KeyValueContainer container: containers) { - // The on-demand scanner has been initialized to pull from the mock container set. - // Make it pull the corresponding container instance to scan in this run based on ID. - long containerID = container.getContainerData().getContainerID(); - Mockito.doReturn(container).when(mockContainerSet).getContainer(containerID); - - Optional> scanFuture = OnDemandContainerDataScanner.scanContainer(container); - assertTrue(scanFuture.isPresent()); - // Wait for on-demand scan to complete. - scanFuture.get().get(); - } + /** + * Checks for the expected number of unique checksums among a container on the provided datanodes. + * @return The data checksum from one of the nodes. Useful if expectedUniqueChecksums = 1. + */ + private static long assertUniqueChecksumCount(long containerID, Collection datanodes, + long expectedUniqueChecksums) { + long actualUniqueChecksums = datanodes.stream() + .mapToLong(d -> d.checkAndGetDataChecksum(containerID)) + .distinct() + .count(); + assertEquals(expectedUniqueChecksums, actualUniqueChecksums); + return datanodes.stream().findAny().get().checkAndGetDataChecksum(containerID); } private static void mockContainerProtocolCalls() { + Map dnMap = datanodes.stream() + .collect(Collectors.toMap(MockDatanode::getDnDetails, Function.identity())); + // Mock getContainerChecksumInfo containerProtocolMock.when(() -> ContainerProtocolCalls.getContainerChecksumInfo(any(), anyLong(), any())) .thenAnswer(inv -> { @@ -235,7 +194,7 @@ private static void mockContainerProtocolCalls() { Pipeline pipeline = xceiverClientSpi.getPipeline(); assertEquals(1, pipeline.size()); DatanodeDetails dn = pipeline.getFirstNode(); - return datanodes.get(dn).getChecksumInfo(containerID); + return dnMap.get(dn).getChecksumInfo(containerID); }); // Mock getBlock @@ -246,7 +205,7 @@ private static void mockContainerProtocolCalls() { Pipeline pipeline = xceiverClientSpi.getPipeline(); assertEquals(1, pipeline.size()); DatanodeDetails dn = pipeline.getFirstNode(); - return datanodes.get(dn).getBlock(blockID); + return dnMap.get(dn).getBlock(blockID); }); // Mock readChunk @@ -258,114 +217,34 @@ private static void mockContainerProtocolCalls() { Pipeline pipeline = xceiverClientSpi.getPipeline(); assertEquals(1, pipeline.size()); DatanodeDetails dn = pipeline.getFirstNode(); - return datanodes.get(dn).readChunk(blockId, chunkInfo); + return dnMap.get(dn).readChunk(blockId, chunkInfo); }); } /** - * Introduce corruption in the container. - * 1. Delete blocks from the container. - * 2. Corrupt chunks at an offset. - * If revers is true, the blocks and chunks are deleted in reverse order. - */ - private void introduceCorruption(KeyValueHandler kvHandler, KeyValueContainer keyValueContainer, int numBlocks, - int numChunks, boolean reverse) throws IOException { - KeyValueContainerData containerData = keyValueContainer.getContainerData(); - // Simulate missing blocks - try (DBHandle handle = BlockUtils.getDB(containerData, conf); - BatchOperation batch = handle.getStore().getBatchHandler().initBatchOperation()) { - List blockDataList = kvHandler.getBlockManager().listBlock(keyValueContainer, -1, 100); - int size = blockDataList.size(); - for (int i = 0; i < numBlocks; i++) { - BlockData blockData = reverse ? blockDataList.get(size - 1 - i) : blockDataList.get(i); - File blockFile = getBlock(keyValueContainer, blockData.getBlockID().getLocalID()); - Assertions.assertTrue(blockFile.delete()); - handle.getStore().getBlockDataTable().deleteWithBatch(batch, containerData.getBlockKey(blockData.getLocalID())); - } - handle.getStore().getBatchHandler().commitBatchOperation(batch); - } -// Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); -// kvHandler.createContainerMerkleTreeFromMetadata(keyValueContainer); - - // Corrupt chunks at an offset. - List blockDataList = kvHandler.getBlockManager().listBlock(keyValueContainer, -1, 100); - int size = blockDataList.size(); - for (int i = 0; i < numChunks; i++) { - int blockIndex = reverse ? size - 1 - (i % size) : i % size; - BlockData blockData = blockDataList.get(blockIndex); - int chunkIndex = i / size; - File blockFile = getBlock(keyValueContainer, blockData.getBlockID().getLocalID()); - List chunks = new ArrayList<>(blockData.getChunks()); - ContainerProtos.ChunkInfo chunkInfo = chunks.remove(chunkIndex); - corruptFileAtOffset(blockFile, (int) chunkInfo.getOffset(), (int) chunkInfo.getLen()); - - // TODO: On-demand scanner (HDDS-10374) should detect this corruption and generate container merkle tree. -// ContainerProtos.ContainerChecksumInfo.Builder builder = kvHandler.getChecksumManager() -// .read(containerData).get().toBuilder(); -// List blockMerkleTreeList = builder.getContainerMerkleTree() -// .getBlockMerkleTreeList(); -// assertEquals(size, blockMerkleTreeList.size()); - -// builder.getContainerMerkleTreeBuilder().clearBlockMerkleTree(); -// for (int j = 0; j < blockMerkleTreeList.size(); j++) { -// ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTreeList.get(j).toBuilder(); -// if (j == blockIndex) { -// List chunkMerkleTreeBuilderList = -// blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); -// chunkMerkleTreeBuilderList.get(chunkIndex).setIsHealthy(false).setDataChecksum(random.nextLong()); -// blockMerkleTreeBuilder.setDataChecksum(random.nextLong()); -// } -// builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); -// } -// builder.getContainerMerkleTreeBuilder().setDataChecksum(random.nextLong()); -// Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); -// writeContainerDataTreeProto(keyValueContainer.getContainerData(), builder.getContainerMerkleTree()); - } - } - - /** - * Overwrite the file with random bytes at an offset within the given length. + * This class wraps a KeyValueHandler instance with just enough features to test its reconciliation functionality. */ - public static void corruptFileAtOffset(File file, int offset, int chunkLength) { - try { - final int fileLength = (int) file.length(); - assertTrue(fileLength >= offset + chunkLength); - final int chunkEnd = offset + chunkLength; - - Path path = file.toPath(); - final byte[] original = IOUtils.readFully(Files.newInputStream(path), fileLength); - - // Corrupt the last byte and middle bytes of the block. The scanner should log this as two errors. - final byte[] corruptedBytes = Arrays.copyOf(original, fileLength); - corruptedBytes[chunkEnd - 1] = (byte) (original[chunkEnd - 1] << 1); - final long chunkMid = offset + ((long) chunkLength - offset) / 2; - corruptedBytes[(int) (chunkMid / 2)] = (byte) (original[(int) (chunkMid / 2)] << 1); - - - Files.write(path, corruptedBytes, - StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); - - assertThat(IOUtils.readFully(Files.newInputStream(path), fileLength)) - .isEqualTo(corruptedBytes) - .isNotEqualTo(original); - } catch (IOException ex) { - // Fail the test. - throw new UncheckedIOException(ex); - } - } - private static class MockDatanode { private final KeyValueHandler handler; private final DatanodeDetails dnDetails; private final OnDemandContainerDataScanner onDemandScanner; private final ContainerSet containerSet; - private final ConfigurationSource conf; + private final OzoneConfiguration conf; - public MockDatanode(DatanodeDetails dnDetails, ConfigurationSource conf, Path tempDir) throws IOException { + public MockDatanode(DatanodeDetails dnDetails, Path tempDir) throws IOException { this.dnDetails = dnDetails; - this.conf = conf; + Path dataVolume = Paths.get(tempDir.toString(), dnDetails.getUuidString(), "data"); + Path metadataVolume = Paths.get(tempDir.toString(), dnDetails.getUuidString(), "metadata"); + + this.conf = new OzoneConfiguration(); + conf.set(HDDS_DATANODE_DIR_KEY, dataVolume.toString()); + conf.set(OZONE_METADATA_DIRS, metadataVolume.toString()); + containerSet = new ContainerSet(1000); - handler = createKeyValueHandler(tempDir); + MutableVolumeSet volumeSet = createVolumeSet(); + handler = ContainerTestUtils.getKeyValueHandler(conf, dnDetails.getUuidString(), containerSet, volumeSet); + handler.setClusterID(CLUSTER_ID); + ContainerController controller = new ContainerController(containerSet, Collections.singletonMap(ContainerProtos.ContainerType.KeyValueContainer, handler)); onDemandScanner = new OnDemandContainerDataScanner( @@ -374,6 +253,14 @@ public MockDatanode(DatanodeDetails dnDetails, ConfigurationSource conf, Path te containerSet.registerContainerScanHandler(onDemandScanner::scanContainer); } + public DatanodeDetails getDnDetails() { + return dnDetails; + } + + /** + * @throws IOException for general IO errors accessing the checksum file + * @throws java.io.FileNotFoundException When the checksum file does not exist. + */ public ContainerProtos.GetContainerChecksumInfoResponseProto getChecksumInfo(long containerID) throws IOException { KeyValueContainer container = getContainer(containerID); ByteString checksumInfo = handler.getChecksumManager().getContainerChecksumInfo(container.getContainerData()); @@ -383,6 +270,25 @@ public ContainerProtos.GetContainerChecksumInfoResponseProto getChecksumInfo(lon .build(); } + /** + * Verifies that the data checksum on disk matches the one in memory, and returns the data checksum. + */ + public long checkAndGetDataChecksum(long containerID) { + KeyValueContainer container = getContainer(containerID); + long dataChecksum = 0; + try { + Optional containerChecksumInfo = + handler.getChecksumManager().read(container.getContainerData()); + assertTrue(containerChecksumInfo.isPresent()); + dataChecksum = containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum(); + assertEquals(container.getContainerData().getDataChecksum(), dataChecksum); + } catch (IOException ex) { + fail("Failed to read container checksum from disk", ex); + } + System.err.println("data checksum on DN " + dnDetails.getUuidString() + ": " + dataChecksum); + return dataChecksum; + } + public ContainerProtos.GetBlockResponseProto getBlock(BlockID blockID) throws IOException { KeyValueContainer container = getContainer(blockID.getContainerID()); ContainerProtos.BlockData blockData = handler.getBlockManager().getBlock(container, blockID).getProtoBufMessage(); @@ -406,39 +312,49 @@ public KeyValueContainer getContainer(long containerID) { return (KeyValueContainer) containerSet.getContainer(containerID); } + /** + * Triggers a synchronous scan of the container. This method will block until the scan completes. + */ public void scanContainer(long containerID) { -// onDemandScanner.scanContainer(containerSet.getContainer(containerID)); - } + Optional> scanFuture = onDemandScanner.scanContainer(containerSet.getContainer(containerID)); + assertTrue(scanFuture.isPresent()); - public void reconcileContainer(DNContainerOperationClient dnClient, Collection peers, - long containerID) throws IOException { - handler.reconcileContainer(dnClient, containerSet.getContainer(containerID), peers); - } + try { + scanFuture.get().get(); + } catch (InterruptedException | ExecutionException e) { + fail("On demand container scan failed", e); + } - private KeyValueHandler createKeyValueHandler(Path path) throws IOException { - final String dnUUID = dnDetails.getUuidString(); - final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); + // TODO: On-demand scanner (HDDS-10374) should detect this corruption and generate container merkle tree. +// ContainerProtos.ContainerChecksumInfo.Builder builder = kvHandler.getChecksumManager() +// .read(containerData).get().toBuilder(); +// List blockMerkleTreeList = builder.getContainerMerkleTree() +// .getBlockMerkleTreeList(); +// assertEquals(size, blockMerkleTreeList.size()); +// +// builder.getContainerMerkleTreeBuilder().clearBlockMerkleTree(); +// for (int j = 0; j < blockMerkleTreeList.size(); j++) { +// ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTreeList.get(j).toBuilder(); +// if (j == blockIndex) { +// List chunkMerkleTreeBuilderList = +// blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); +// chunkMerkleTreeBuilderList.get(chunkIndex).setIsHealthy(false).setDataChecksum(random.nextLong()); +// blockMerkleTreeBuilder.setDataChecksum(random.nextLong()); +// } +// builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); +// } +// builder.getContainerMerkleTreeBuilder().setDataChecksum(random.nextLong()); +// Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); +// writeContainerDataTreeProto(keyValueContainer.getContainerData(), builder.getContainerMerkleTree()); + } - // TODO this path and addContainer in this class may be using different parts of the temp dir - HddsVolume hddsVolume = new HddsVolume.Builder(path.toString()) - .conf(conf) - .clusterID(CLUSTER_ID) - .datanodeUuid(dnUUID) - .volumeSet(volumeSet) - .build(); - hddsVolume.format(CLUSTER_ID); - hddsVolume.createWorkingDir(CLUSTER_ID, null); - hddsVolume.createTmpDirs(CLUSTER_ID); - when(volumeSet.getVolumesList()).thenReturn(Collections.singletonList(hddsVolume)); - final KeyValueHandler kvHandler = ContainerTestUtils.getKeyValueHandler(conf, - dnUUID, containerSet, volumeSet); - kvHandler.setClusterID(CLUSTER_ID); - // Clean up metrics for next tests. - hddsVolume.getVolumeInfoStats().unregister(); - hddsVolume.getVolumeIOStats().unregister(); - ContainerMetrics.remove(); - - return kvHandler; + public void reconcileContainer(DNContainerOperationClient dnClient, Collection peers, + long containerID) { + try { + handler.reconcileContainer(dnClient, containerSet.getContainer(containerID), peers); + } catch (IOException ex) { + fail("Container reconciliation failed", ex); + } } /** @@ -447,32 +363,33 @@ private KeyValueHandler createKeyValueHandler(Path path) throws IOException { * deleted blocks. */ public void addContainerWithBlocks(long containerId, int blocks) throws Exception { - String strBlock = "block"; - String strChunk = "chunkFile"; - MutableVolumeSet volumeSet = new MutableVolumeSet(dnDetails.getUuidString(), conf, null, - StorageVolume.VolumeType.DATA_VOLUME, null); - createDbInstancesForTestIfNeeded(volumeSet, CLUSTER_ID, CLUSTER_ID, conf); - int bytesPerChecksum = 2 * (int) OzoneConsts.KB; - Checksum checksum = new Checksum(ContainerProtos.ChecksumType.SHA256, - bytesPerChecksum); - byte[] chunkData = RandomStringUtils.randomAscii(CHUNK_LEN).getBytes(UTF_8); - ChecksumData checksumData = checksum.computeChecksum(chunkData); - - KeyValueContainerData containerData = new KeyValueContainerData(containerId, - ContainerLayoutVersion.FILE_PER_BLOCK, (long) CHUNKS_PER_BLOCK * CHUNK_LEN * blocks, - UUID.randomUUID().toString(), UUID.randomUUID().toString()); - Path kvContainerPath = Files.createDirectory(containerDir.resolve(UUID.randomUUID().toString())); - containerData.setMetadataPath(kvContainerPath.toString()); - containerData.setDbFile(kvContainerPath.toFile()); - - KeyValueContainer container = new KeyValueContainer(containerData, conf); - StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()) - .forEach(hddsVolume -> hddsVolume.setDbParentDir(kvContainerPath.toFile())); - container.create(volumeSet, new RoundRobinVolumeChoosingPolicy(), UUID.randomUUID().toString()); - assertNotNull(containerData.getChunksPath()); - File chunksPath = new File(containerData.getChunksPath()); + ContainerProtos.CreateContainerRequestProto createRequest = + ContainerProtos.CreateContainerRequestProto.newBuilder() + .setContainerType(ContainerProtos.ContainerType.KeyValueContainer) + .build(); + ContainerProtos.ContainerCommandRequestProto request = + ContainerProtos.ContainerCommandRequestProto.newBuilder() + .setCmdType(ContainerProtos.Type.CreateContainer) + .setCreateContainer(createRequest) + .setContainerID(containerId) + .setDatanodeUuid(dnDetails.getUuidString()) + .build(); + + handler.handleCreateContainer(request,null); + KeyValueContainer container = getContainer(containerId); + + // Verify container is initially empty. + File chunksPath = new File(container.getContainerData().getChunksPath()); ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, 0, 0); + // Create data to put in the container. + // Seed using the container ID so that all replicas are identical. + Random byteGenerator = new Random(containerId); + // This array will keep getting populated with new bytes for each chunk. + byte[] chunkData = new byte[CHUNK_LEN]; + int bytesPerChecksum = 2 * (int) OzoneConsts.KB; + + // Add data to the container. List chunkList = new ArrayList<>(); for (int i = 0; i < blocks; i++) { BlockID blockID = new BlockID(containerId, i); @@ -480,10 +397,15 @@ public void addContainerWithBlocks(long containerId, int blocks) throws Exceptio chunkList.clear(); for (long chunkCount = 0; chunkCount < CHUNKS_PER_BLOCK; chunkCount++) { - String chunkName = strBlock + i + strChunk + chunkCount; - long offset = chunkCount * CHUNK_LEN; - ChunkInfo info = new ChunkInfo(chunkName, offset, CHUNK_LEN); + String chunkName = "chunk" + chunkCount; + long offset = chunkCount * chunkData.length; + ChunkInfo info = new ChunkInfo(chunkName, offset, chunkData.length); + // Generate data for the chunk and compute its checksum. + byteGenerator.nextBytes(chunkData); + Checksum checksum = new Checksum(ContainerProtos.ChecksumType.SHA256, bytesPerChecksum); + ChecksumData checksumData = checksum.computeChecksum(chunkData); info.setChecksumData(checksumData); + // Write chunk and checksum into the container. chunkList.add(info.getProtoBufMessage()); handler.getChunkManager().writeChunk(container, blockID, info, ByteBuffer.wrap(chunkData), WRITE_STAGE); @@ -492,12 +414,104 @@ public void addContainerWithBlocks(long containerId, int blocks) throws Exceptio blockData.setChunks(chunkList); blockData.setBlockCommitSequenceId(i); handler.getBlockManager().putBlock(container, blockData); + } + ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, blocks, (long) blocks * CHUNKS_PER_BLOCK); + container.markContainerForClose(); + handler.closeContainer(container); + } + + /** + * Returns a list of all blocks in the container sorted numerically by blockID. + * For example, the unsorted list would have the first blocks as 1, 10, 11... + * The list returned by this method would have the first blocks as 1, 2, 3... + */ + private List getSortedBlocks(KeyValueContainer container) throws IOException { + List blockDataList = handler.getBlockManager().listBlock(container, -1, 100); + blockDataList.sort(Comparator.comparingLong(BlockData::getLocalID)); + return blockDataList; + } + + /** + * Introduce corruption in the container. + * 1. Delete blocks from the container. + * 2. Corrupt chunks at an offset. + * If revers is true, the blocks and chunks are deleted in reverse order. + */ + public void introduceCorruption(long containerID, int numBlocksToDelete, int numChunksToCorrupt, boolean reverse) + throws IOException { + KeyValueContainer container = getContainer(containerID); + KeyValueContainerData containerData = container.getContainerData(); + // Simulate missing blocks + try (DBHandle handle = BlockUtils.getDB(containerData, conf); + BatchOperation batch = handle.getStore().getBatchHandler().initBatchOperation()) { + List blockDataList = getSortedBlocks(container); + int size = blockDataList.size(); + for (int i = 0; i < numBlocksToDelete; i++) { + BlockData blockData = reverse ? blockDataList.get(size - 1 - i) : blockDataList.get(i); + File blockFile = TestContainerCorruptions.getBlock(container, blockData.getBlockID().getLocalID()); + Assertions.assertTrue(blockFile.delete()); + handle.getStore().getBlockDataTable().deleteWithBatch(batch, containerData.getBlockKey(blockData.getLocalID())); + } + handle.getStore().getBatchHandler().commitBatchOperation(batch); + + // Check the op + blockDataList = getSortedBlocks(container); + assertEquals(numBlocksToDelete, size - blockDataList.size()); + System.err.println(blockDataList); + } + + // Corrupt chunks at an offset. + List blockDataList = getSortedBlocks(container); + int size = blockDataList.size(); + for (int i = 0; i < numChunksToCorrupt; i++) { + int blockIndex = reverse ? size - 1 - (i % size) : i % size; + BlockData blockData = blockDataList.get(blockIndex); + int chunkIndex = i / size; + File blockFile = TestContainerCorruptions.getBlock(container, blockData.getBlockID().getLocalID()); + List chunks = new ArrayList<>(blockData.getChunks()); + ContainerProtos.ChunkInfo chunkInfo = chunks.remove(chunkIndex); + corruptFileAtOffset(blockFile, chunkInfo.getOffset(), chunkInfo.getLen()); + System.err.println("datanode " + dnDetails.getUuidString() + " corrupting block " + blockData.getBlockID() + " at " + + "offset " + chunkInfo.getOffset()); + } + } + + private MutableVolumeSet createVolumeSet() throws IOException { + MutableVolumeSet volumeSet = new MutableVolumeSet(dnDetails.getUuidString(), conf, null, + StorageVolume.VolumeType.DATA_VOLUME, null); + createDbInstancesForTestIfNeeded(volumeSet, CLUSTER_ID, CLUSTER_ID, conf); + return volumeSet; + } - ContainerLayoutTestInfo.FILE_PER_BLOCK.validateFileCount(chunksPath, blocks, (long) blocks * CHUNKS_PER_BLOCK); - container.markContainerForClose(); - handler.closeContainer(container); + /** + * Overwrite the file with random bytes at an offset within the given length. + */ + private static void corruptFileAtOffset(File file, long offset, long chunkLength) { + try { + final int fileLength = (int) file.length(); + assertTrue(fileLength >= offset + chunkLength); + final int chunkEnd = (int)(offset + chunkLength); + + Path path = file.toPath(); + final byte[] original = IOUtils.readFully(Files.newInputStream(path), fileLength); + + // Corrupt the last byte and middle bytes of the block. The scanner should log this as two errors. + final byte[] corruptedBytes = Arrays.copyOf(original, fileLength); + corruptedBytes[chunkEnd - 1] = (byte) (original[chunkEnd - 1] << 1); + final long chunkMid = offset + (chunkLength - offset) / 2; + corruptedBytes[(int) (chunkMid / 2)] = (byte) (original[(int) (chunkMid / 2)] << 1); + + + Files.write(path, corruptedBytes, + StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); + + assertThat(IOUtils.readFully(Files.newInputStream(path), fileLength)) + .isEqualTo(corruptedBytes) + .isNotEqualTo(original); + } catch (IOException ex) { + // Fail the test. + throw new UncheckedIOException(ex); } - containerSet.addContainer(container); } } } From 0cf79f6620177a16995a7c11191f63c0e78ed8e6 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 28 Apr 2025 13:15:51 -0400 Subject: [PATCH 29/62] Improve logging in test and prod code --- .../container/keyvalue/KeyValueHandler.java | 23 +++++-- ...eyValueHandlerContainerReconciliation.java | 65 ++++++++++++------- 2 files changed, 60 insertions(+), 28 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 8ee3245d80bc..20307120f17d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1521,9 +1521,16 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Try creating the checksum info from RocksDB metadata if it is not present. checksumInfo = updateAndGetContainerChecksum(containerData); } - long oldDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); + // Data checksum before reconciling with any peers. + long originalDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); + // Final checksum after reconciling with all peers. + long dataChecksum = originalDataChecksum; + + int successfulPeerCount = 0; for (DatanodeDetails peer : peers) { + // Data checksum updated after each peer reconciles. + long previousDataChecksum = dataChecksum; long start = Instant.now().toEpochMilli(); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = dnClient.getContainerChecksumInfo( containerID, peer); @@ -1569,21 +1576,24 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Update checksum based on RocksDB metadata. The read chunk validates the checksum of the data // we read. So we can update the checksum only based on the RocksDB metadata. ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = updateAndGetContainerChecksum(containerData); - long dataChecksum = updatedChecksumInfo.getContainerMerkleTree().getDataChecksum(); + dataChecksum = updatedChecksumInfo.getContainerMerkleTree().getDataChecksum(); long duration = Instant.now().toEpochMilli() - start; - if (dataChecksum == oldDataChecksum) { + if (dataChecksum == previousDataChecksum) { metrics.incContainerReconciledWithoutChanges(); LOG.info("Container {} reconciled with peer {}. No change in checksum. Current checksum {}. Time taken {} ms", containerID, peer.toString(), checksumToString(dataChecksum), duration); } else { metrics.incContainerReconciledWithChanges(); LOG.warn("Container {} reconciled with peer {}. Checksum updated from {} to {}. Time taken {} ms", - containerID, peer.toString(), checksumToString(oldDataChecksum), + containerID, peer.toString(), checksumToString(previousDataChecksum), checksumToString(dataChecksum), duration); } - ContainerLogger.logReconciled(container.getContainerData(), oldDataChecksum, peer); + ContainerLogger.logReconciled(container.getContainerData(), previousDataChecksum, peer); + successfulPeerCount++; } + LOG.info("Completed reconciliation for container {} with {}/{} peers. Checksum updated from {} to {}", containerID, + successfulPeerCount, peers.size(), checksumToString(originalDataChecksum), checksumToString(dataChecksum)); // Trigger manual on demand scanner containerSet.scanContainer(containerID); @@ -1626,7 +1636,8 @@ private void handleMissingBlock(KeyValueContainer container, Pipeline pipeline, Token blockToken = dnClient.getTokenHelper().getBlockToken(blockID, 0L); if (getBlockManager().blockExists(container, blockID)) { LOG.warn("Block {} already exists in container {}. The block should not exist and our container merkle tree" + - " is stale. Skipping reconciliation for this block.", blockID, containerData.getContainerID()); + " is stale. Skipping reconciliation for this block.", blockID.getLocalID(), + containerData.getContainerID()); return; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java index b944c6e959f5..377f24131cd1 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java @@ -1,6 +1,7 @@ package org.apache.hadoop.ozone.container.keyvalue; import org.apache.commons.io.IOUtils; +import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -28,11 +29,14 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; import org.mockito.MockedStatic; import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; @@ -51,6 +55,7 @@ import java.util.Map; import java.util.Optional; import java.util.Random; +import java.util.Set; import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; @@ -85,23 +90,26 @@ public class TestKeyValueHandlerContainerReconciliation { */ public static Stream corruptionValues() { return Stream.of( - Arguments.of(5, 0), - Arguments.of(0, 5), - Arguments.of(0, 10), - Arguments.of(10, 0), - Arguments.of(5, 10), - Arguments.of(10, 5), - Arguments.of(2, 3), - Arguments.of(3, 2), - Arguments.of(4, 6), - Arguments.of(6, 4), - Arguments.of(6, 9), - Arguments.of(9, 6) +// Arguments.of(5, 0), +// Arguments.of(0, 5), +// Arguments.of(0, 10), +// Arguments.of(10, 0), +// Arguments.of(5, 10), + // TODO + Arguments.of(10, 5) +// Arguments.of(2, 3), +// Arguments.of(3, 2), +// Arguments.of(4, 6), +// Arguments.of(6, 4), +// Arguments.of(6, 9), +// Arguments.of(9, 6) ); } + public static final Logger LOG = LoggerFactory.getLogger(TestKeyValueHandlerContainerReconciliation.class); + // All container replicas will be placed in this directory, and the same replicas will be re-used for each test run. -// @TempDir + @TempDir private static Path containerDir; private static DNContainerOperationClient dnClient; private static MockedStatic containerProtocolMock; @@ -120,7 +128,7 @@ public static Stream corruptionValues() { */ @BeforeAll public static void setup() throws Exception { - containerDir = Files.createTempDirectory("reconcile"); +// containerDir = Files.createTempDirectory("reconcile"); dnClient = new DNContainerOperationClient(new OzoneConfiguration(), null, null); datanodes = new ArrayList<>(); @@ -149,6 +157,8 @@ public static void teardown() { @ParameterizedTest @MethodSource("corruptionValues") public void testContainerReconciliation(int numBlocksToDelete, int numChunksToCorrupt) throws Exception { + LOG.info("Healthy data checksum for container {} in this test is {}", CONTAINER_ID, + HddsUtils.checksumToString(healthyDataChecksum)); // Introduce corruption in each container on different replicas. List dnsToCorrupt = datanodes.stream().limit(2).collect(Collectors.toList()); @@ -159,8 +169,15 @@ public void testContainerReconciliation(int numBlocksToDelete, int numChunksToCo // Without reconciliation, checksums should be different because of the corruption. assertUniqueChecksumCount(CONTAINER_ID, datanodes, 3); - List peers = datanodes.stream().map(MockDatanode::getDnDetails).collect(Collectors.toList()); - datanodes.forEach(d -> d.reconcileContainer(dnClient, peers, CONTAINER_ID)); + // Reconcile each datanode with its peers. + // In a real cluster, SCM will not send a command to reconcile a datanode with itself. + for (MockDatanode current : datanodes) { + List peers = datanodes.stream() + .map(MockDatanode::getDnDetails) + .filter(other -> !current.getDnDetails().equals(other)) + .collect(Collectors.toList()); + current.reconcileContainer(dnClient, peers, CONTAINER_ID); + } // After reconciliation, checksums should be the same for all containers. // Reconciliation should have updated the tree based on the updated metadata that was obtained for the // previously corrupted data. We do not need to wait for the full data scan to complete. @@ -231,8 +248,11 @@ private static class MockDatanode { private final ContainerSet containerSet; private final OzoneConfiguration conf; + public final Logger log; + public MockDatanode(DatanodeDetails dnDetails, Path tempDir) throws IOException { this.dnDetails = dnDetails; + log = LoggerFactory.getLogger("mock-datanode-" + dnDetails.getUuidString()); Path dataVolume = Paths.get(tempDir.toString(), dnDetails.getUuidString(), "data"); Path metadataVolume = Paths.get(tempDir.toString(), dnDetails.getUuidString(), "metadata"); @@ -285,7 +305,8 @@ public long checkAndGetDataChecksum(long containerID) { } catch (IOException ex) { fail("Failed to read container checksum from disk", ex); } - System.err.println("data checksum on DN " + dnDetails.getUuidString() + ": " + dataChecksum); + log.info("Retrieved data checksum {} from container {}", HddsUtils.checksumToString(healthyDataChecksum), + containerID); return dataChecksum; } @@ -350,6 +371,7 @@ public void scanContainer(long containerID) { public void reconcileContainer(DNContainerOperationClient dnClient, Collection peers, long containerID) { + log.info("Beginning reconciliation on this mock datanode"); try { handler.reconcileContainer(dnClient, containerSet.getContainer(containerID), peers); } catch (IOException ex) { @@ -451,13 +473,12 @@ public void introduceCorruption(long containerID, int numBlocksToDelete, int num File blockFile = TestContainerCorruptions.getBlock(container, blockData.getBlockID().getLocalID()); Assertions.assertTrue(blockFile.delete()); handle.getStore().getBlockDataTable().deleteWithBatch(batch, containerData.getBlockKey(blockData.getLocalID())); + log.info("Deleting block {} from container {}", blockData.getBlockID().getLocalID(), containerID); } handle.getStore().getBatchHandler().commitBatchOperation(batch); - - // Check the op + // Check that the correct number of blocks were deleted. blockDataList = getSortedBlocks(container); assertEquals(numBlocksToDelete, size - blockDataList.size()); - System.err.println(blockDataList); } // Corrupt chunks at an offset. @@ -471,8 +492,8 @@ public void introduceCorruption(long containerID, int numBlocksToDelete, int num List chunks = new ArrayList<>(blockData.getChunks()); ContainerProtos.ChunkInfo chunkInfo = chunks.remove(chunkIndex); corruptFileAtOffset(blockFile, chunkInfo.getOffset(), chunkInfo.getLen()); - System.err.println("datanode " + dnDetails.getUuidString() + " corrupting block " + blockData.getBlockID() + " at " + - "offset " + chunkInfo.getOffset()); + log.info("Corrupting block {} at offset {} in container {}", blockData.getBlockID().getLocalID(), + chunkInfo.getOffset(), containerID); } } From 8b30f54d5e7b1767510891afae9692110d9c443f Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 28 Apr 2025 15:32:11 -0400 Subject: [PATCH 30/62] Fix tree tracking during reconcile process --- .../ContainerChecksumTreeManager.java | 4 ++ .../container/keyvalue/KeyValueHandler.java | 56 ++++++++++++------- ...eyValueHandlerContainerReconciliation.java | 8 ++- 3 files changed, 46 insertions(+), 22 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 90a2d0f52ac5..24b095892af4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -306,6 +306,10 @@ private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer // chunks from us when they reconcile. } + public static long getDatachecksum(ContainerProtos.ContainerChecksumInfo checksumInfo) { + return checksumInfo.getContainerMerkleTree().getDataChecksum(); + } + /** * Returns the container checksum tree file for the specified container without deserializing it. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 20307120f17d..730a7034e6d7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1512,25 +1512,27 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container KeyValueContainer kvContainer = (KeyValueContainer) container; KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); long containerID = containerData.getContainerID(); - Optional optionalChecksumInfo = checksumManager.read(containerData); - ContainerProtos.ContainerChecksumInfo checksumInfo; + // Obtain the original checksum info before reconciling with any peers. + Optional optionalChecksumInfo = checksumManager.read(containerData); + ContainerProtos.ContainerChecksumInfo originalChecksumInfo; if (optionalChecksumInfo.isPresent()) { - checksumInfo = optionalChecksumInfo.get(); + originalChecksumInfo = optionalChecksumInfo.get(); } else { // Try creating the checksum info from RocksDB metadata if it is not present. - checksumInfo = updateAndGetContainerChecksum(containerData); + originalChecksumInfo = updateAndGetContainerChecksum(containerData); } - // Data checksum before reconciling with any peers. - long originalDataChecksum = checksumInfo.getContainerMerkleTree().getDataChecksum(); - // Final checksum after reconciling with all peers. - long dataChecksum = originalDataChecksum; + // This holds our last checksum info after reconciling with the previous peer for logging purposes. + ContainerProtos.ContainerChecksumInfo previousChecksumInfo; + // This holds our current most up to date checksum info that we are using for the container. + ContainerProtos.ContainerChecksumInfo latestChecksumInfo = originalChecksumInfo; int successfulPeerCount = 0; for (DatanodeDetails peer : peers) { + LOG.info("Beginning reconciliation for container {} with peer {}. Current data checksum is {}", + containerID, peer, checksumToString(ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo))); // Data checksum updated after each peer reconciles. - long previousDataChecksum = dataChecksum; long start = Instant.now().toEpochMilli(); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = dnClient.getContainerChecksumInfo( containerID, peer); @@ -1540,7 +1542,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container continue; } - ContainerDiffReport diffReport = checksumManager.diff(checksumInfo, peerChecksumInfo); + ContainerDiffReport diffReport = checksumManager.diff(latestChecksumInfo, peerChecksumInfo); Pipeline pipeline = createSingleNodePipeline(peer); ByteBuffer chunkByteBuffer = ByteBuffer.allocate(chunkSize); @@ -1573,27 +1575,43 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container containerID, e); } } + // Update checksum based on RocksDB metadata. The read chunk validates the checksum of the data // we read. So we can update the checksum only based on the RocksDB metadata. - ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = updateAndGetContainerChecksum(containerData); - dataChecksum = updatedChecksumInfo.getContainerMerkleTree().getDataChecksum(); + previousChecksumInfo = latestChecksumInfo; + latestChecksumInfo = updateAndGetContainerChecksum(containerData); long duration = Instant.now().toEpochMilli() - start; - if (dataChecksum == previousDataChecksum) { + if (ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo) == + ContainerChecksumTreeManager.getDatachecksum(previousChecksumInfo)) { metrics.incContainerReconciledWithoutChanges(); LOG.info("Container {} reconciled with peer {}. No change in checksum. Current checksum {}. Time taken {} ms", - containerID, peer.toString(), checksumToString(dataChecksum), duration); + containerID, peer.toString(), + checksumToString(latestChecksumInfo.getContainerMerkleTree().getDataChecksum()), duration); } else { metrics.incContainerReconciledWithChanges(); LOG.warn("Container {} reconciled with peer {}. Checksum updated from {} to {}. Time taken {} ms", - containerID, peer.toString(), checksumToString(previousDataChecksum), - checksumToString(dataChecksum), duration); + containerID, peer.toString(), + checksumToString(ContainerChecksumTreeManager.getDatachecksum(previousChecksumInfo)), + checksumToString(ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo)), duration); } - ContainerLogger.logReconciled(container.getContainerData(), previousDataChecksum, peer); + ContainerLogger.logReconciled(container.getContainerData(), + ContainerChecksumTreeManager.getDatachecksum(previousChecksumInfo), peer); successfulPeerCount++; } - LOG.info("Completed reconciliation for container {} with {}/{} peers. Checksum updated from {} to {}", containerID, - successfulPeerCount, peers.size(), checksumToString(originalDataChecksum), checksumToString(dataChecksum)); + + // Log a summary after reconciling with all peers. + if (ContainerChecksumTreeManager.getDatachecksum(originalChecksumInfo) == + ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo)) { + LOG.info("Completed reconciliation for container {} with {}/{} peers. Original data checksum {} was not updated", + containerID, successfulPeerCount, peers.size(), + checksumToString(ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo))); + } else { + LOG.info("Completed reconciliation for container {} with {}/{} peers. Data checksum updated from {} to {}", + containerID, successfulPeerCount, peers.size(), + checksumToString(ContainerChecksumTreeManager.getDatachecksum(originalChecksumInfo)), + checksumToString(ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo))); + } // Trigger manual on demand scanner containerSet.scanContainer(containerID); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java index 377f24131cd1..fa8de54cd152 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java @@ -135,6 +135,8 @@ public static void setup() throws Exception { // Create a container with 15 blocks and 3 replicas. for (int i = 0; i < NUM_DATANODES; i++) { DatanodeDetails dnDetails = randomDatanodeDetails(); + // Use this fake host name to track the node through the test since it's easier to visualize than a UUID. + dnDetails.setHostName("dn" + (i + 1)); MockDatanode dn = new MockDatanode(dnDetails, containerDir); dn.addContainerWithBlocks(CONTAINER_ID, 15); datanodes.add(dn); @@ -252,9 +254,9 @@ private static class MockDatanode { public MockDatanode(DatanodeDetails dnDetails, Path tempDir) throws IOException { this.dnDetails = dnDetails; - log = LoggerFactory.getLogger("mock-datanode-" + dnDetails.getUuidString()); - Path dataVolume = Paths.get(tempDir.toString(), dnDetails.getUuidString(), "data"); - Path metadataVolume = Paths.get(tempDir.toString(), dnDetails.getUuidString(), "metadata"); + log = LoggerFactory.getLogger("mock-datanode-" + dnDetails.getHostName()); + Path dataVolume = Paths.get(tempDir.toString(), dnDetails.getHostName(), "data"); + Path metadataVolume = Paths.get(tempDir.toString(), dnDetails.getHostName(), "metadata"); this.conf = new OzoneConfiguration(); conf.set(HDDS_DATANODE_DIR_KEY, dataVolume.toString()); From 9c74f4b6796a36fe87ec86843fdc39bde5b850c5 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 29 Apr 2025 00:49:11 -0400 Subject: [PATCH 31/62] Use mixin to standardize scanner operations, log checksum changes in scanner --- .../ContainerChecksumTreeManager.java | 7 + .../common/helpers/ContainerUtils.java | 23 --- .../common/utils/ContainerLogger.java | 12 ++ .../AbstractBackgroundContainerScanner.java | 10 - .../BackgroundContainerDataScanner.java | 58 +----- .../BackgroundContainerMetadataScanner.java | 30 +-- .../ozoneimpl/ContainerScannerMixin.java | 178 ++++++++++++++++++ .../OnDemandContainerDataScanner.java | 98 +--------- 8 files changed, 210 insertions(+), 206 deletions(-) create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 24b095892af4..77a0fa7ceaa6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -310,6 +310,13 @@ public static long getDatachecksum(ContainerProtos.ContainerChecksumInfo checksu return checksumInfo.getContainerMerkleTree().getDataChecksum(); } + /** + * Returns whether the container checksum tree file for the specified container exists without deserializing it. + */ + public static boolean hasContainerChecksumFile(ContainerData data) { + return getContainerChecksumFile(data).exists(); + } + /** * Returns the container checksum tree file for the specified container without deserializing it. */ diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java index c06c02f0bc7e..1c2dcb1bde76 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java @@ -235,29 +235,6 @@ public static String getContainerFileChecksum(String containerDataYamlStr) } } - public static boolean recentlyScanned(Container container, - long minScanGap, Logger log) { - Optional lastScanTime = - container.getContainerData().lastDataScanTime(); - Instant now = Instant.now(); - // Container is considered recently scanned if it was scanned within the - // configured time frame. If the optional is empty, the container was - // never scanned. - boolean recentlyScanned = lastScanTime.map(scanInstant -> - Duration.between(now, scanInstant).abs() - .compareTo(Duration.ofMillis(minScanGap)) < 0) - .orElse(false); - - if (recentlyScanned && log.isDebugEnabled()) { - log.debug("Skipping scan for container {} which was last " + - "scanned at {}. Current time is {}.", - container.getContainerData().getContainerID(), lastScanTime.get(), - now); - } - - return recentlyScanned; - } - /** * Get the .container file from the containerBaseDir. * @param containerBaseDir container base directory. The name of this diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java index 6f20f22a8bb3..3db90773f10a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/ContainerLogger.java @@ -148,6 +148,18 @@ public static void logRecovered(ContainerData containerData) { LOG.info(getMessage(containerData)); } + /** + * Logged when a container's checksum is updated. + * + * @param containerData The container which has the updated data checksum. + * @param oldDataChecksum The old data checksum. + */ + public static void logChecksumUpdated(ContainerData containerData, long oldDataChecksum) { + LOG.warn(getMessage(containerData, + "Container data checksum updated from " + checksumToString(oldDataChecksum) + " to " + + checksumToString(containerData.getDataChecksum()))); + } + /** * Logged when a container is reconciled. * diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/AbstractBackgroundContainerScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/AbstractBackgroundContainerScanner.java index b51c2c6ca54a..dd7e30a5d9d2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/AbstractBackgroundContainerScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/AbstractBackgroundContainerScanner.java @@ -23,7 +23,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -135,15 +134,6 @@ public final void handleRemainingSleep(long remainingSleep) { } } - public static void logUnhealthyScanResult(long containerID, ScanResult result, Logger log) { - LOG.error("Corruption detected in container [{}]. Marking it UNHEALTHY. {}", containerID, result); - if (log.isDebugEnabled()) { - StringBuilder allErrorString = new StringBuilder(); - result.getErrors().forEach(r -> allErrorString.append(r).append('\n')); - log.debug("Complete list of errors detected while scanning container {}:\n{}", containerID, allErrorString); - } - } - /** * Shutdown the current container scanning thread. * If the thread is already being shutdown, the call will block until the diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java index f67b21c8c7e1..a15409f66a76 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java @@ -25,7 +25,6 @@ import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; -import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; @@ -49,8 +48,8 @@ public class BackgroundContainerDataScanner extends private final Canceler canceler; private static final String NAME_FORMAT = "ContainerDataScanner(%s)"; private final ContainerDataScannerMetrics metrics; - private final long minScanGap; private final ContainerChecksumTreeManager checksumManager; + private final ContainerScannerMixin scannerMixin; public BackgroundContainerDataScanner(ContainerScannerConfiguration conf, ContainerController controller, @@ -62,13 +61,8 @@ public BackgroundContainerDataScanner(ContainerScannerConfiguration conf, canceler = new Canceler(); this.metrics = ContainerDataScannerMetrics.create(volume.toString()); this.metrics.setStorageDirectory(volume.toString()); - this.minScanGap = conf.getContainerScanMinGap(); this.checksumManager = checksumManager; - } - - private boolean shouldScan(Container container) { - return container.shouldScanData() && - !ContainerUtils.recentlyScanned(container, minScanGap, LOG); + this.scannerMixin = new ContainerScannerMixin(LOG, controller, metrics, conf); } @Override @@ -80,45 +74,7 @@ public void scanContainer(Container c) shutdown("The volume has failed."); return; } - - if (!shouldScan(c)) { - return; - } - ContainerData containerData = c.getContainerData(); - long containerId = containerData.getContainerID(); - logScanStart(containerData); - DataScanResult result = c.scanData(throttler, canceler); - - if (result.isDeleted()) { - LOG.debug("Container [{}] has been deleted during the data scan.", containerId); - } else { - // Merkle tree write failure should not abort the scanning process. Continue marking the scan as completed. - try { - checksumManager.writeContainerDataTree(containerData, result.getDataTree()); - } catch (IOException ex) { - LOG.error("Failed to write container merkle tree for container {}", containerId, ex); - } - - if (!result.isHealthy()) { - logUnhealthyScanResult(containerId, result, LOG); - - // Only increment the number of unhealthy containers if the container was not already unhealthy. - // TODO HDDS-11593 (to be merged in to the feature branch from master): Scanner counters will start from zero - // at the beginning of each run, so this will need to be incremented for every unhealthy container seen - // regardless of its previous state. - if (controller.markContainerUnhealthy(containerId, result)) { - metrics.incNumUnHealthyContainers(); - } - } - metrics.incNumContainersScanned(); - } - - Instant now = Instant.now(); - if (!result.isDeleted()) { - controller.updateDataScanTimestamp(containerId, now); - } - // Even if the container was deleted, mark the scan as completed since we already logged it as starting. - logScanCompleted(containerData, now); + scannerMixin.scanData(c, checksumManager, throttler, canceler); } @Override @@ -135,14 +91,6 @@ private static void logScanStart(ContainerData containerData) { } } - private static void logScanCompleted( - ContainerData containerData, Instant timestamp) { - if (LOG.isDebugEnabled()) { - LOG.debug("Completed scan of container {} at {}", - containerData.getContainerID(), timestamp); - } - } - @Override public synchronized void shutdown() { shutdown(""); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java index c06017b7e258..aeb9a0e077dc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java @@ -36,17 +36,16 @@ public class BackgroundContainerMetadataScanner extends AbstractBackgroundContainerScanner { public static final Logger LOG = LoggerFactory.getLogger(BackgroundContainerMetadataScanner.class); - private final ContainerMetadataScannerMetrics metrics; private final ContainerController controller; - private final long minScanGap; + private final ContainerScannerMixin scannerMixin; public BackgroundContainerMetadataScanner(ContainerScannerConfiguration conf, ContainerController controller) { super("ContainerMetadataScanner", conf.getMetadataScanInterval()); this.controller = controller; this.metrics = ContainerMetadataScannerMetrics.create(); - this.minScanGap = conf.getContainerScanMinGap(); + this.scannerMixin = new ContainerScannerMixin(LOG, controller, metrics, conf); } @Override @@ -58,21 +57,11 @@ public Iterator> getContainerIterator() { @Override public void scanContainer(Container container) throws IOException, InterruptedException { - // There is one background container metadata scanner per datanode. - // If this container's volume has failed, skip the container. - // The iterator returned by getContainerIterator may have stale results. - ContainerData data = container.getContainerData(); - long containerID = data.getContainerID(); - HddsVolume containerVolume = data.getVolume(); - if (containerVolume.isFailed()) { - LOG.debug("Skipping scan of container {}. Its volume {} has failed.", - containerID, containerVolume); + if (!scannerMixin.shouldScanMetadata(container)) { return; } - if (!shouldScan(container)) { - return; - } + long containerID = container.getContainerData().getContainerID(); MetadataScanResult result = container.scanMetaData(); if (result.isDeleted()) { @@ -80,11 +69,7 @@ public void scanContainer(Container container) return; } if (!result.isHealthy()) { - logUnhealthyScanResult(containerID, result, LOG); - boolean containerMarkedUnhealthy = controller.markContainerUnhealthy(containerID, result); - if (containerMarkedUnhealthy) { - metrics.incNumUnHealthyContainers(); - } + scannerMixin.handleUnhealthyScanResult(containerID, result); } // Do not update the scan timestamp after the scan since this was just a @@ -97,9 +82,4 @@ public ContainerMetadataScannerMetrics getMetrics() { return this.metrics; } - private boolean shouldScan(Container container) { - // Full data scan also does a metadata scan. If a full data scan was done - // recently, we can skip this metadata scan. - return !ContainerUtils.recentlyScanned(container, minScanGap, LOG); - } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java new file mode 100644 index 000000000000..3b005cb39f10 --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.ozoneimpl; + +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.Optional; + +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdfs.util.Canceler; +import org.apache.hadoop.hdfs.util.DataTransferThrottler; +import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; +import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; +import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.slf4j.Logger; + +/** + * Mixin to handle common data and metadata scan operations among background and on-demand scanners. + */ +public class ContainerScannerMixin { + private final Logger log; + private final ContainerController controller; + private final AbstractContainerScannerMetrics metrics; + private final long minScanGap; + + public ContainerScannerMixin(Logger log, ContainerController controller, + AbstractContainerScannerMetrics metrics, ContainerScannerConfiguration conf) { + this.log = log; + this.controller = controller; + this.metrics = metrics; + this.minScanGap = conf.getContainerScanMinGap(); + } + + public void scanData(Container container, ContainerChecksumTreeManager checksumManager, + DataTransferThrottler throttler, Canceler canceler) + throws IOException, InterruptedException { + if (!shouldScanData(container)) { + return; + } + ContainerData containerData = container.getContainerData(); + long containerId = containerData.getContainerID(); + long originalDataChecksum = containerData.getDataChecksum(); + long updatedDataChecksum = originalDataChecksum; + boolean hasChecksumFile = ContainerChecksumTreeManager.hasContainerChecksumFile(containerData); + logScanStart(containerData); + DataScanResult result = container.scanData(throttler, canceler); + + if (result.isDeleted()) { + log.debug("Container [{}] has been deleted during the data scan.", containerId); + } else { + // Merkle tree write failure should not abort the scanning process. Continue marking the scan as completed. + try { + // Also updates the data checksum in containerData. + checksumManager.writeContainerDataTree(containerData, result.getDataTree()); + updatedDataChecksum = containerData.getDataChecksum(); + } catch (IOException ex) { + log.error("Failed to write container merkle tree for container {}", containerId, ex); + } + + if (updatedDataChecksum != originalDataChecksum) { + String message = "Container data checksum updated from " + originalDataChecksum + " to " + updatedDataChecksum; + if (hasChecksumFile) { + // If this is the first time the scanner has run with the feature to generate a checksum file, don't + // log a warning for the checksum update. + log.debug(message); + } else { + log.warn(message); + ContainerLogger.logChecksumUpdated(containerData, originalDataChecksum); + } + } + + if (!result.isHealthy()) { + handleUnhealthyScanResult(containerId, result); + } + metrics.incNumContainersScanned(); + } + + Instant now = Instant.now(); + if (!result.isDeleted()) { + controller.updateDataScanTimestamp(containerId, now); + } + // Even if the container was deleted, mark the scan as completed since we already logged it as starting. + logScanCompleted(containerData, now); + } + + public void handleUnhealthyScanResult(long containerID, ScanResult result) throws IOException { + + log.error("Corruption detected in container [{}]. Marking it UNHEALTHY. {}", containerID, result); + if (log.isDebugEnabled()) { + StringBuilder allErrorString = new StringBuilder(); + result.getErrors().forEach(r -> allErrorString.append(r).append('\n')); + log.debug("Complete list of errors detected while scanning container {}:\n{}", containerID, allErrorString); + } + + // Only increment the number of unhealthy containers if the container was not already unhealthy. + // TODO HDDS-11593 (to be merged in to the feature branch from master): Scanner counters will start from zero + // at the beginning of each run, so this will need to be incremented for every unhealthy container seen + // regardless of its previous state. + boolean containerMarkedUnhealthy = controller.markContainerUnhealthy(containerID, result); + if (containerMarkedUnhealthy) { + metrics.incNumUnHealthyContainers(); + } + } + + public boolean shouldScanMetadata(Container container) { + if (container == null) { + return false; + } + long containerID = container.getContainerData().getContainerID(); + + HddsVolume containerVolume = container.getContainerData().getVolume(); + if (containerVolume.isFailed()) { + log.debug("Skipping scan for container {} since its volume {} has failed.", containerID, containerVolume); + return false; + } + + return !recentlyScanned(container.getContainerData()); + } + + public boolean shouldScanData(Container container) { + return shouldScanMetadata(container) && container.shouldScanData(); + } + + private boolean recentlyScanned(ContainerData containerData) { + Optional lastScanTime = containerData.lastDataScanTime(); + Instant now = Instant.now(); + // Container is considered recently scanned if it was scanned within the + // configured time frame. If the optional is empty, the container was + // never scanned. + boolean recentlyScanned = lastScanTime.map(scanInstant -> + Duration.between(now, scanInstant).abs() + .compareTo(Duration.ofMillis(minScanGap)) < 0) + .orElse(false); + + if (recentlyScanned && log.isDebugEnabled()) { + log.debug("Skipping scan for container {} which was last " + + "scanned at {}. Current time is {}.", + containerData.getContainerID(), lastScanTime.get(), + now); + } + + return recentlyScanned; + } + + private void logScanStart(ContainerData containerData) { + if (log.isDebugEnabled()) { + Optional scanTimestamp = containerData.lastDataScanTime(); + Object lastScanTime = scanTimestamp.map(ts -> "at " + ts).orElse("never"); + log.debug("Scanning container {}, last scanned {}", + containerData.getContainerID(), lastScanTime); + } + } + + private void logScanCompleted( + ContainerData containerData, Instant timestamp) { + log.debug("Completed scan of container {} at {}", + containerData.getContainerID(), timestamp); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index abdec1abfe0f..b26f45dec981 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -17,11 +17,7 @@ package org.apache.hadoop.ozone.container.ozoneimpl; -import static org.apache.hadoop.ozone.container.ozoneimpl.AbstractBackgroundContainerScanner.logUnhealthyScanResult; - -import com.google.common.annotations.VisibleForTesting; import java.io.IOException; -import java.time.Instant; import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; @@ -31,10 +27,7 @@ import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; -import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; -import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,49 +39,29 @@ public final class OnDemandContainerDataScanner { LoggerFactory.getLogger(OnDemandContainerDataScanner.class); private final ExecutorService scanExecutor; - private final ContainerController containerController; private final DataTransferThrottler throttler; private final Canceler canceler; private final ConcurrentHashMap .KeySetView containerRescheduleCheckSet; private final OnDemandScannerMetrics metrics; - private final long minScanGap; private final ContainerChecksumTreeManager checksumManager; + private final ContainerScannerMixin scannerMixin; public OnDemandContainerDataScanner( ContainerScannerConfiguration conf, ContainerController controller, ContainerChecksumTreeManager checksumManager) { - containerController = controller; throttler = new DataTransferThrottler( conf.getOnDemandBandwidthPerVolume()); canceler = new Canceler(); metrics = OnDemandScannerMetrics.create(); scanExecutor = Executors.newSingleThreadExecutor(); containerRescheduleCheckSet = ConcurrentHashMap.newKeySet(); - minScanGap = conf.getContainerScanMinGap(); this.checksumManager = checksumManager; - } - - private boolean shouldScan(Container container) { - if (container == null) { - return false; - } - long containerID = container.getContainerData().getContainerID(); - - HddsVolume containerVolume = container.getContainerData().getVolume(); - if (containerVolume.isFailed()) { - LOG.debug("Skipping on demand scan for container {} since its volume {}" + - " has failed.", containerID, containerVolume); - return false; - } - - return container.shouldScanData(); -// return !ContainerUtils.recentlyScanned(container, minScanGap, -// LOG) && container.shouldScanData(); + this.scannerMixin = new ContainerScannerMixin(LOG, controller, metrics, conf); } public Optional> scanContainer(Container container) { - if (!shouldScan(container)) { + if (!scannerMixin.shouldScanData(container)) { return Optional.empty(); } @@ -113,47 +86,11 @@ private void removeContainerFromScheduledContainers( } private void performOnDemandScan(Container container) { - if (!shouldScan(container)) { - return; - } - - long containerId = container.getContainerData().getContainerID(); try { - ContainerData containerData = container.getContainerData(); - logScanStart(containerData); - - DataScanResult result = container.scanData(throttler, canceler); - // Metrics for skipped containers should not be updated. - if (result.isDeleted()) { - LOG.debug("Container [{}] has been deleted during the data scan.", containerId); - } else { - // Merkle tree write failure should not abort the scanning process. Continue marking the scan as completed. - try { - checksumManager.writeContainerDataTree(containerData, result.getDataTree()); - } catch (IOException ex) { - LOG.error("Failed to write container merkle tree for container {}", containerId, ex); - } - if (!result.isHealthy()) { - logUnhealthyScanResult(containerId, result, LOG); - boolean containerMarkedUnhealthy = containerController - .markContainerUnhealthy(containerId, result); - if (containerMarkedUnhealthy) { - metrics.incNumUnHealthyContainers(); - } - } - metrics.incNumContainersScanned(); - } - - Instant now = Instant.now(); - if (!result.isDeleted()) { - containerController.updateDataScanTimestamp(containerId, now); - } - - // Even if the container was deleted, mark the scan as completed since we already logged it as starting. - logScanCompleted(containerData, now); + scannerMixin.scanData(container, checksumManager, throttler, canceler); } catch (IOException e) { LOG.warn("Unexpected exception while scanning container " - + containerId, e); + + container.getContainerData().getContainerID(), e); } catch (InterruptedException ex) { // This should only happen as part of shutdown, which will stop the // ExecutorService. @@ -161,35 +98,10 @@ private void performOnDemandScan(Container container) { } } - private void logScanStart(ContainerData containerData) { - if (LOG.isDebugEnabled()) { - Optional scanTimestamp = containerData.lastDataScanTime(); - Object lastScanTime = scanTimestamp.map(ts -> "at " + ts).orElse("never"); - LOG.debug("Scanning container {}, last scanned {}", - containerData.getContainerID(), lastScanTime); - } - } - - private void logScanCompleted( - ContainerData containerData, Instant timestamp) { - LOG.debug("Completed scan of container {} at {}", - containerData.getContainerID(), timestamp); - } - public OnDemandScannerMetrics getMetrics() { return metrics; } - @VisibleForTesting - public DataTransferThrottler getThrottler() { - return throttler; - } - - @VisibleForTesting - public Canceler getCanceler() { - return canceler; - } - public synchronized void shutdown() { metrics.unregister(); String shutdownMessage = "On-demand container scanner is shutting down."; From d550669d37775b2ba900c96f4a450a7ac0481944 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 29 Apr 2025 15:45:55 -0400 Subject: [PATCH 32/62] Logging improvements --- .../container/ozoneimpl/ContainerController.java | 12 +++++++----- .../ozoneimpl/ContainerScannerMixin.java | 15 ++++++++++----- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java index f781fe20db42..37e50953f050 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java @@ -112,14 +112,16 @@ public void markContainerForClose(final long containerId) public boolean markContainerUnhealthy(final long containerId, ScanResult reason) throws IOException { Container container = getContainer(containerId); - if (container != null && container.getContainerState() != State.UNHEALTHY) { + if (container == null) { + LOG.warn("Container {} not found, may be deleted, skip marking UNHEALTHY", containerId); + return false; + } else if (container.getContainerState() == State.UNHEALTHY) { + LOG.debug("Container {} is already UNHEALTHY, skip marking UNHEALTHY", containerId); + return false; + } else { getHandler(container).markContainerUnhealthy(container, reason); return true; - } else { - LOG.warn("Container {} not found, may be deleted, skip mark UNHEALTHY", - containerId); } - return false; } /** diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java index 3b005cb39f10..e21db9ed61bc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java @@ -33,6 +33,8 @@ import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.slf4j.Logger; +import static org.apache.hadoop.hdds.HddsUtils.checksumToString; + /** * Mixin to handle common data and metadata scan operations among background and on-demand scanners. */ @@ -77,14 +79,16 @@ public void scanData(Container container, ContainerChecksumTreeManager checks } if (updatedDataChecksum != originalDataChecksum) { - String message = "Container data checksum updated from " + originalDataChecksum + " to " + updatedDataChecksum; + String message = + "Container data checksum updated from " + checksumToString(originalDataChecksum) + " to " + + checksumToString(updatedDataChecksum); if (hasChecksumFile) { + log.warn(message); + ContainerLogger.logChecksumUpdated(containerData, originalDataChecksum); + } else { // If this is the first time the scanner has run with the feature to generate a checksum file, don't // log a warning for the checksum update. log.debug(message); - } else { - log.warn(message); - ContainerLogger.logChecksumUpdated(containerData, originalDataChecksum); } } @@ -133,7 +137,8 @@ public boolean shouldScanMetadata(Container container) { return false; } - return !recentlyScanned(container.getContainerData()); +// return !recentlyScanned(container.getContainerData()); + return true; } public boolean shouldScanData(Container container) { From 97e02ea114c66d300463f16bb885d16c13fe651d Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 30 Apr 2025 10:54:44 -0400 Subject: [PATCH 33/62] Add checksum validation, generate readable data --- ...eyValueHandlerContainerReconciliation.java | 85 +++++++++++-------- 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java index fa8de54cd152..372e45f09337 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java @@ -71,6 +71,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyLong; @@ -109,7 +110,7 @@ public static Stream corruptionValues() { public static final Logger LOG = LoggerFactory.getLogger(TestKeyValueHandlerContainerReconciliation.class); // All container replicas will be placed in this directory, and the same replicas will be re-used for each test run. - @TempDir +// @TempDir private static Path containerDir; private static DNContainerOperationClient dnClient; private static MockedStatic containerProtocolMock; @@ -128,7 +129,8 @@ public static Stream corruptionValues() { */ @BeforeAll public static void setup() throws Exception { -// containerDir = Files.createTempDirectory("reconcile"); + containerDir = Files.createTempDirectory("reconcile"); + LOG.info("Data written to {}", containerDir); dnClient = new DNContainerOperationClient(new OzoneConfiguration(), null, null); datanodes = new ArrayList<>(); @@ -233,11 +235,14 @@ private static void mockContainerProtocolCalls() { XceiverClientSpi xceiverClientSpi = inv.getArgument(0); ContainerProtos.ChunkInfo chunkInfo = inv.getArgument(1); ContainerProtos.DatanodeBlockID blockId = inv.getArgument(2); + List checksumValidators = inv.getArgument(3); Pipeline pipeline = xceiverClientSpi.getPipeline(); assertEquals(1, pipeline.size()); DatanodeDetails dn = pipeline.getFirstNode(); - return dnMap.get(dn).readChunk(blockId, chunkInfo); + return dnMap.get(dn).readChunk(blockId, chunkInfo, checksumValidators); }); + + containerProtocolMock.when(() -> ContainerProtocolCalls.toValidatorList(any())).thenCallRealMethod(); } /** @@ -321,14 +326,42 @@ public ContainerProtos.GetBlockResponseProto getBlock(BlockID blockID) throws IO } public ContainerProtos.ReadChunkResponseProto readChunk(ContainerProtos.DatanodeBlockID blockId, - ContainerProtos.ChunkInfo chunkInfo) throws IOException { + ContainerProtos.ChunkInfo chunkInfo, List validators) throws IOException { KeyValueContainer container = getContainer(blockId.getContainerID()); - return ContainerProtos.ReadChunkResponseProto.newBuilder() - .setBlockID(blockId) - .setChunkData(chunkInfo) - .setData(handler.getChunkManager().readChunk(container, BlockID.getFromProtobuf(blockId), - ChunkInfo.getFromProtoBuf(chunkInfo), null).toByteString()) - .build(); + ContainerProtos.ReadChunkResponseProto readChunkResponseProto = + ContainerProtos.ReadChunkResponseProto.newBuilder() + .setBlockID(blockId) + .setChunkData(chunkInfo) + .setData(handler.getChunkManager().readChunk(container, BlockID.getFromProtobuf(blockId), + ChunkInfo.getFromProtoBuf(chunkInfo), null).toByteString()) + .build(); + verifyChecksums(readChunkResponseProto, blockId, chunkInfo, validators); + return readChunkResponseProto; + } + + public void verifyChecksums(ContainerProtos.ReadChunkResponseProto readChunkResponseProto, + ContainerProtos.DatanodeBlockID blockId, ContainerProtos.ChunkInfo chunkInfo, + List validators) throws IOException { + assertFalse(validators.isEmpty()); + ContainerProtos.ContainerCommandRequestProto requestProto = + ContainerProtos.ContainerCommandRequestProto.newBuilder() + .setCmdType(ContainerProtos.Type.ReadChunk) + .setContainerID(blockId.getContainerID()) + .setDatanodeUuid(dnDetails.getUuidString()) + .setReadChunk( + ContainerProtos.ReadChunkRequestProto.newBuilder() + .setBlockID(blockId) + .setChunkData(chunkInfo) + .build()) + .build(); + ContainerProtos.ContainerCommandResponseProto responseProto = + ContainerProtos.ContainerCommandResponseProto.newBuilder() + .setCmdType(ContainerProtos.Type.ReadChunk) + .setResult(ContainerProtos.Result.SUCCESS) + .setReadChunk(readChunkResponseProto).build(); + for (XceiverClientSpi.Validator function : validators) { + function.accept(requestProto, responseProto); + } } public KeyValueContainer getContainer(long containerID) { @@ -347,28 +380,6 @@ public void scanContainer(long containerID) { } catch (InterruptedException | ExecutionException e) { fail("On demand container scan failed", e); } - - // TODO: On-demand scanner (HDDS-10374) should detect this corruption and generate container merkle tree. -// ContainerProtos.ContainerChecksumInfo.Builder builder = kvHandler.getChecksumManager() -// .read(containerData).get().toBuilder(); -// List blockMerkleTreeList = builder.getContainerMerkleTree() -// .getBlockMerkleTreeList(); -// assertEquals(size, blockMerkleTreeList.size()); -// -// builder.getContainerMerkleTreeBuilder().clearBlockMerkleTree(); -// for (int j = 0; j < blockMerkleTreeList.size(); j++) { -// ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTreeList.get(j).toBuilder(); -// if (j == blockIndex) { -// List chunkMerkleTreeBuilderList = -// blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); -// chunkMerkleTreeBuilderList.get(chunkIndex).setIsHealthy(false).setDataChecksum(random.nextLong()); -// blockMerkleTreeBuilder.setDataChecksum(random.nextLong()); -// } -// builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); -// } -// builder.getContainerMerkleTreeBuilder().setDataChecksum(random.nextLong()); -// Files.deleteIfExists(getContainerChecksumFile(keyValueContainer.getContainerData()).toPath()); -// writeContainerDataTreeProto(keyValueContainer.getContainerData(), builder.getContainerMerkleTree()); } public void reconcileContainer(DNContainerOperationClient dnClient, Collection peers, @@ -424,9 +435,15 @@ public void addContainerWithBlocks(long containerId, int blocks) throws Exceptio String chunkName = "chunk" + chunkCount; long offset = chunkCount * chunkData.length; ChunkInfo info = new ChunkInfo(chunkName, offset, chunkData.length); + // Generate data for the chunk and compute its checksum. - byteGenerator.nextBytes(chunkData); - Checksum checksum = new Checksum(ContainerProtos.ChecksumType.SHA256, bytesPerChecksum); +// byteGenerator.nextBytes(chunkData); + for (int c = 0; c < chunkData.length; c += 2) { + chunkData[c] = (byte) (byteGenerator.nextInt(95) + 32); + chunkData[c+1] = (byte)'\n'; + } + + Checksum checksum = new Checksum(ContainerProtos.ChecksumType.CRC32, bytesPerChecksum); ChecksumData checksumData = checksum.computeChecksum(chunkData); info.setChecksumData(checksumData); // Write chunk and checksum into the container. From 22b41b80b1925c8e9d691440df978175091e6b37 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 5 May 2025 12:09:24 -0400 Subject: [PATCH 34/62] Use tree writer between peer updates. All tests pass --- .../ContainerChecksumTreeManager.java | 3 +- .../checksum/ContainerMerkleTreeWriter.java | 57 ++- .../container/keyvalue/KeyValueHandler.java | 361 ++++++++++-------- ...eyValueHandlerContainerReconciliation.java | 27 +- 4 files changed, 265 insertions(+), 183 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 77a0fa7ceaa6..d442449d6974 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -37,6 +37,7 @@ import java.util.TreeSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; + import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; @@ -306,7 +307,7 @@ private void compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer // chunks from us when they reconcile. } - public static long getDatachecksum(ContainerProtos.ContainerChecksumInfo checksumInfo) { + public static long getDataChecksum(ContainerProtos.ContainerChecksumInfo checksumInfo) { return checksumInfo.getContainerMerkleTree().getDataChecksum(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeWriter.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeWriter.java index 57c5788d61c3..e6821e9fefe0 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeWriter.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeWriter.java @@ -49,12 +49,27 @@ public class ContainerMerkleTreeWriter { public static final Supplier CHECKSUM_BUFFER_SUPPLIER = ChecksumByteBufferFactory::crc32CImpl; /** - * Constructs an empty Container merkle tree object. + * Constructs a writer for an initially empty container merkle tree. */ public ContainerMerkleTreeWriter() { id2Block = new TreeMap<>(); } + /** + * Constructs a writer for a container merkle tree which initially contains all the information from the specified + * proto. + */ + public ContainerMerkleTreeWriter(ContainerProtos.ContainerMerkleTree fromTree) { + id2Block = new TreeMap<>(); + for (ContainerProtos.BlockMerkleTree blockTree: fromTree.getBlockMerkleTreeList()) { + long blockID = blockTree.getBlockID(); + addBlock(blockID); + for (ContainerProtos.ChunkMerkleTree chunkTree: blockTree.getChunkMerkleTreeList()) { + addChunks(blockID, chunkTree); + } + } + } + /** * Adds chunks to a block in the tree. The block entry will be created if it is the first time adding chunks to it. * If the block entry already exists, the chunks will be added to the existing chunks for that block. @@ -64,16 +79,29 @@ public ContainerMerkleTreeWriter() { * being added had errors. * @param chunks A list of chunks to add to this block. The chunks will be sorted internally by their offset. */ - public void addChunks(long blockID, boolean healthy, ContainerProtos.ChunkInfo... chunks) { - id2Block.computeIfAbsent(blockID, BlockMerkleTreeWriter::new).addChunks(healthy, chunks); - } - public void addChunks(long blockID, boolean healthy, Collection chunks) { for (ContainerProtos.ChunkInfo chunk: chunks) { addChunks(blockID, healthy, chunk); } } + public void addChunks(long blockID, boolean healthy, ContainerProtos.ChunkInfo... chunks) { + for (ContainerProtos.ChunkInfo chunk: chunks) { + addChunks(blockID, new ChunkMerkleTreeWriter(chunk, healthy)); + } + } + + + public void addChunks(long blockID, ContainerProtos.ChunkMerkleTree... chunks) { + for (ContainerProtos.ChunkMerkleTree chunkTree: chunks) { + addChunks(blockID, new ChunkMerkleTreeWriter(chunkTree)); + } + } + + private void addChunks(long blockID, ChunkMerkleTreeWriter chunkWriter) { + id2Block.computeIfAbsent(blockID, BlockMerkleTreeWriter::new).addChunks(chunkWriter); + } + /** * Adds an empty block to the tree. This method is not a pre-requisite to {@code addChunks}. * If the block entry already exists, it will not be modified. @@ -128,13 +156,11 @@ private static class BlockMerkleTreeWriter { * Adds the specified chunks to this block. The offset value of the chunk must be unique within the block, * otherwise it will overwrite the previous value at that offset. * - * @param healthy True if there were no errors detected with these chunks. False indicates that all the chunks - * being added had errors. * @param chunks A list of chunks to add to this block. */ - public void addChunks(boolean healthy, ContainerProtos.ChunkInfo... chunks) { - for (ContainerProtos.ChunkInfo chunk: chunks) { - offset2Chunk.put(chunk.getOffset(), new ChunkMerkleTreeWriter(chunk, healthy)); + public void addChunks(ChunkMerkleTreeWriter... chunks) { + for (ChunkMerkleTreeWriter chunk: chunks) { + offset2Chunk.put(chunk.getOffset(), chunk); } } @@ -191,6 +217,17 @@ private static class ChunkMerkleTreeWriter { this.dataChecksum = checksumImpl.getValue(); } + ChunkMerkleTreeWriter(ContainerProtos.ChunkMerkleTree chunkTree) { + length = chunkTree.getLength(); + offset = chunkTree.getOffset(); + isHealthy = chunkTree.getIsHealthy(); + dataChecksum = chunkTree.getDataChecksum(); + } + + public long getOffset() { + return offset; + } + /** * Computes a single hash for this ChunkInfo object. All chunk level checksum computation happens within this * method. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 730a7034e6d7..0c4a1b72ccdc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -76,12 +76,13 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.Optional; import java.util.Set; -import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.locks.Lock; import java.util.function.Function; @@ -1506,9 +1507,10 @@ public void deleteContainer(Container container, boolean force) deleteInternal(container, force); } + @SuppressWarnings("checkstyle:MethodLength") @Override public void reconcileContainer(DNContainerOperationClient dnClient, Container container, - Collection peers) throws IOException { + Collection peers) throws IOException { KeyValueContainer kvContainer = (KeyValueContainer) container; KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); long containerID = containerData.getContainerID(); @@ -1522,16 +1524,24 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Try creating the checksum info from RocksDB metadata if it is not present. originalChecksumInfo = updateAndGetContainerChecksum(containerData); } - - // This holds our last checksum info after reconciling with the previous peer for logging purposes. - ContainerProtos.ContainerChecksumInfo previousChecksumInfo; - // This holds our current most up to date checksum info that we are using for the container. + // This holds our current most up-to-date checksum info that we are using for the container. ContainerProtos.ContainerChecksumInfo latestChecksumInfo = originalChecksumInfo; int successfulPeerCount = 0; + Set allBlocksUpdated = new HashSet<>(); + ByteBuffer chunkByteBuffer = ByteBuffer.allocate(chunkSize); + for (DatanodeDetails peer : peers) { + long numMissingBlocksRepaired = 0; + long numCorruptChunksRepaired = 0; + long numMissingChunksRepaired = 0; + // This will be updated as we do repairs with this peer, then used to write the updated tree for the diff with the + // next peer. + ContainerMerkleTreeWriter updatedTreeWriter = + new ContainerMerkleTreeWriter(latestChecksumInfo.getContainerMerkleTree()); + LOG.info("Beginning reconciliation for container {} with peer {}. Current data checksum is {}", - containerID, peer, checksumToString(ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo))); + containerID, peer, checksumToString(ContainerChecksumTreeManager.getDataChecksum(latestChecksumInfo))); // Data checksum updated after each peer reconciles. long start = Instant.now().toEpochMilli(); ContainerProtos.ContainerChecksumInfo peerChecksumInfo = dnClient.getContainerChecksumInfo( @@ -1544,22 +1554,39 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container ContainerDiffReport diffReport = checksumManager.diff(latestChecksumInfo, peerChecksumInfo); Pipeline pipeline = createSingleNodePipeline(peer); - ByteBuffer chunkByteBuffer = ByteBuffer.allocate(chunkSize); // Handle missing blocks for (ContainerProtos.BlockMerkleTree missingBlock : diffReport.getMissingBlocks()) { - try { - handleMissingBlock(kvContainer, pipeline, dnClient, missingBlock, chunkByteBuffer); - } catch (IOException e) { - LOG.error("Error while reconciling missing block for block {} in container {}", missingBlock.getBlockID(), - containerID, e); + long localID = missingBlock.getBlockID(); + BlockID blockID = new BlockID(containerID, localID); + if (getBlockManager().blockExists(container, blockID)) { + LOG.warn("Cannot reconcile block {} in container {} which was previously reported missing but is now " + + "present. Our container merkle tree is stale.", localID, containerID); + } else { + try { + long chunksInBlockRetrieved = reconcileChunksPerBlock(kvContainer, pipeline, dnClient, localID, + missingBlock.getChunkMerkleTreeList(), updatedTreeWriter, chunkByteBuffer); + if (chunksInBlockRetrieved != 0) { + allBlocksUpdated.add(localID); + numMissingBlocksRepaired++; + } + } catch (IOException e) { + LOG.error("Error while reconciling missing block for block {} in container {}", missingBlock.getBlockID(), + containerID, e); + } } } // Handle missing chunks for (Map.Entry> entry : diffReport.getMissingChunks().entrySet()) { + long localID = entry.getKey(); try { - reconcileChunksPerBlock(kvContainer, pipeline, dnClient, entry.getKey(), entry.getValue(), chunkByteBuffer); + long missingChunksRepaired = reconcileChunksPerBlock(kvContainer, pipeline, dnClient, entry.getKey(), + entry.getValue(), updatedTreeWriter, chunkByteBuffer); + if (missingChunksRepaired != 0) { + allBlocksUpdated.add(localID); + numMissingChunksRepaired += missingChunksRepaired; + } } catch (IOException e) { LOG.error("Error while reconciling missing chunk for block {} in container {}", entry.getKey(), containerID, e); @@ -1568,52 +1595,70 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Handle corrupt chunks for (Map.Entry> entry : diffReport.getCorruptChunks().entrySet()) { + long localID = entry.getKey(); try { - reconcileChunksPerBlock(kvContainer, pipeline, dnClient, entry.getKey(), entry.getValue(), chunkByteBuffer); + long corruptChunksRepaired = reconcileChunksPerBlock(kvContainer, pipeline, dnClient, entry.getKey(), + entry.getValue(), updatedTreeWriter, chunkByteBuffer); + if (corruptChunksRepaired != 0) { + allBlocksUpdated.add(localID); + numCorruptChunksRepaired += corruptChunksRepaired; + } } catch (IOException e) { LOG.error("Error while reconciling corrupt chunk for block {} in container {}", entry.getKey(), containerID, e); } } - // Update checksum based on RocksDB metadata. The read chunk validates the checksum of the data - // we read. So we can update the checksum only based on the RocksDB metadata. - previousChecksumInfo = latestChecksumInfo; - latestChecksumInfo = updateAndGetContainerChecksum(containerData); + // Based on repaired done with this peer, write the updated merkle tree to the container. + // This updated tree will be used when we reconcile with the next peer. + ContainerProtos.ContainerChecksumInfo previousChecksumInfo = latestChecksumInfo; + latestChecksumInfo = checksumManager.writeContainerDataTree(containerData, updatedTreeWriter); + // Log the results of reconciliation with this peer. long duration = Instant.now().toEpochMilli() - start; - if (ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo) == - ContainerChecksumTreeManager.getDatachecksum(previousChecksumInfo)) { - metrics.incContainerReconciledWithoutChanges(); - LOG.info("Container {} reconciled with peer {}. No change in checksum. Current checksum {}. Time taken {} ms", - containerID, peer.toString(), - checksumToString(latestChecksumInfo.getContainerMerkleTree().getDataChecksum()), duration); + long previousDataChecksum = ContainerChecksumTreeManager.getDataChecksum(previousChecksumInfo); + long latestDataChecksum = ContainerChecksumTreeManager.getDataChecksum(latestChecksumInfo); + if (previousDataChecksum == latestDataChecksum) { + if (numCorruptChunksRepaired != 0 || numMissingBlocksRepaired != 0 || numMissingChunksRepaired != 0) { + // This condition should never happen. + LOG.error("Checksum of container was not updated but blocks were repaired."); + } + LOG.info("Container {} reconciled with peer {}. Data checksum {} was not updated. Time taken: {} ms", + containerID, peer, checksumToString(previousDataChecksum), duration); } else { - metrics.incContainerReconciledWithChanges(); - LOG.warn("Container {} reconciled with peer {}. Checksum updated from {} to {}. Time taken {} ms", - containerID, peer.toString(), - checksumToString(ContainerChecksumTreeManager.getDatachecksum(previousChecksumInfo)), - checksumToString(ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo)), duration); + LOG.warn("Container {} reconciled with peer {}. Data checksum updated from {} to {}" + + ".\nMissing blocks repaired: {}/{}\n" + + "Missing chunks repaired: {}/{}\n" + + "Corrupt chunks repaired: {}/{}\n" + + "Time taken: {} ms", + containerID, peer, checksumToString(previousDataChecksum), checksumToString(latestDataChecksum), + numMissingBlocksRepaired, diffReport.getMissingBlocks().size(), + numMissingChunksRepaired, diffReport.getMissingChunks().size(), + numCorruptChunksRepaired, diffReport.getCorruptChunks().size(), + duration); } - ContainerLogger.logReconciled(container.getContainerData(), - ContainerChecksumTreeManager.getDatachecksum(previousChecksumInfo), peer); + + ContainerLogger.logReconciled(container.getContainerData(), previousDataChecksum, peer); successfulPeerCount++; } // Log a summary after reconciling with all peers. - if (ContainerChecksumTreeManager.getDatachecksum(originalChecksumInfo) == - ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo)) { + long originalDataChecksum = ContainerChecksumTreeManager.getDataChecksum(originalChecksumInfo); + long latestDataChecksum = ContainerChecksumTreeManager.getDataChecksum(latestChecksumInfo); + if (originalDataChecksum == latestDataChecksum) { LOG.info("Completed reconciliation for container {} with {}/{} peers. Original data checksum {} was not updated", - containerID, successfulPeerCount, peers.size(), - checksumToString(ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo))); + containerID, successfulPeerCount, peers.size(), checksumToString(latestDataChecksum)); } else { - LOG.info("Completed reconciliation for container {} with {}/{} peers. Data checksum updated from {} to {}", - containerID, successfulPeerCount, peers.size(), - checksumToString(ContainerChecksumTreeManager.getDatachecksum(originalChecksumInfo)), - checksumToString(ContainerChecksumTreeManager.getDatachecksum(latestChecksumInfo))); + LOG.warn("Completed reconciliation for container {} with {}/{} peers. {} blocks were updated. Data checksum " + + "updated from {} to {}", containerID, successfulPeerCount, peers.size(), allBlocksUpdated.size(), + checksumToString(originalDataChecksum), checksumToString(latestDataChecksum)); + if (LOG.isDebugEnabled()) { + LOG.debug("Blocks updated in container {} after reconciling with {} peers: {}", containerID, + successfulPeerCount, allBlocksUpdated); + } } - // Trigger manual on demand scanner + // Trigger on demand scanner, which will build the merkle tree based on the newly ingested data. containerSet.scanContainer(containerID); sendICR(container); } @@ -1641,111 +1686,47 @@ private ContainerProtos.ContainerChecksumInfo updateAndGetContainerChecksum(KeyV } /** - * Handle missing block. It reads the missing block data from the peer datanode and writes it to the local container. - * If the block write fails, the block commit sequence id of the container and the block are not updated. + * Read chunks from a peer datanode and use them to repair our container. + * + * We will keep pulling chunks from the peer until we encounter an error. At that point we will stop trying to + * reconcile this block instead of trying to write it with holes. Whatever data we have pulled up to that point will + * be committed. Block commit sequence ID of the block and container are only updated if the entire block is read + * and written successfully. + * + * To avoid verbose logging during reconciliation, this method should not log successful operations above the debug + * level. + * + * @return true if this method updated the specified block in our container. False otherwise. */ - private void handleMissingBlock(KeyValueContainer container, Pipeline pipeline, DNContainerOperationClient dnClient, - ContainerProtos.BlockMerkleTree missingBlock, ByteBuffer chunkByteBuffer) - throws IOException { - ContainerData containerData = container.getContainerData(); - BlockID blockID = new BlockID(containerData.getContainerID(), missingBlock.getBlockID()); + private long reconcileChunksPerBlock(KeyValueContainer container, Pipeline pipeline, + DNContainerOperationClient dnClient, long localID, List peerChunkList, + ContainerMerkleTreeWriter treeWriter, ByteBuffer chunkByteBuffer) throws IOException { + long containerID = container.getContainerData().getContainerID(); + DatanodeDetails peer = pipeline.getFirstNode(); + + BlockID blockID = new BlockID(containerID, localID); // The length of the block is not known, so instead of passing the default block length we pass 0. As the length // is not used to validate the token for getBlock call. Token blockToken = dnClient.getTokenHelper().getBlockToken(blockID, 0L); - if (getBlockManager().blockExists(container, blockID)) { - LOG.warn("Block {} already exists in container {}. The block should not exist and our container merkle tree" + - " is stale. Skipping reconciliation for this block.", blockID.getLocalID(), - containerData.getContainerID()); - return; - } - - List successfulChunksList = new ArrayList<>(); - boolean overwriteBcsId = true; - BlockLocationInfo blkInfo = new BlockLocationInfo.Builder() - .setBlockID(blockID) - .setPipeline(pipeline) - .setToken(blockToken) - .build(); - // Under construction is set here, during BlockInputStream#initialize() it is used to update the block length. - blkInfo.setUnderConstruction(true); - try (BlockInputStream blockInputStream = (BlockInputStream) blockInputStreamFactory.create( - RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.ONE), - blkInfo, pipeline, blockToken, dnClient.getXceiverClientManager(), - null, conf.getObject(OzoneClientConfig.class))) { - // Initialize the BlockInputStream. Gets the blockData from the peer, sets the block length and - // initializes ChunkInputStream for each chunk. - blockInputStream.initialize(); - ContainerProtos.BlockData peerBlockData = blockInputStream.getStreamBlockData(); - // The maxBcsId is the peer's bcsId as there is no block for this blockID in the local container. - long maxBcsId = peerBlockData.getBlockID().getBlockCommitSequenceId(); - List peerChunksList = peerBlockData.getChunksList(); - - // Don't update bcsId if chunk read fails - for (ContainerProtos.ChunkInfo chunkInfoProto : peerChunksList) { - try { - // Seek to the offset of the chunk. Seek updates the chunkIndex in the BlockInputStream. - blockInputStream.seek(chunkInfoProto.getOffset()); - - // Read the chunk data from the BlockInputStream and write it to the container. - int chunkLength = (int) chunkInfoProto.getLen(); - if (chunkByteBuffer.capacity() < chunkLength) { - chunkByteBuffer = ByteBuffer.allocate(chunkLength); - } - - chunkByteBuffer.clear(); - chunkByteBuffer.limit(chunkLength); - int bytesRead = blockInputStream.read(chunkByteBuffer); - if (bytesRead != chunkLength) { - throw new IOException("Error while reading chunk data from block input stream. Expected length: " + - chunkLength + ", Actual length: " + bytesRead); - } - - chunkByteBuffer.flip(); - ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkByteBuffer); - ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); - chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); - writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); - // If the chunk read/write fails, we are expected to have holes in the blockData's chunk list. - // But that is okay, if the read fails it means there might be a hole in the peer datanode as well. - // If the chunk write fails then we don't want to add the metadata without the actual data as there is - // no data to verify the chunk checksum. - successfulChunksList.add(chunkInfoProto); - } catch (IOException ex) { - overwriteBcsId = false; - LOG.error("Error while reconciling missing block {} for offset {} in container {}", - blockID, chunkInfoProto.getOffset(), containerData.getContainerID(), ex); - } - } - - BlockData putBlockData = BlockData.getFromProtoBuf(peerBlockData); - putBlockData.setChunks(successfulChunksList); - putBlockForClosedContainer(container, putBlockData, maxBcsId, overwriteBcsId); - chunkManager.finishWriteChunks(container, putBlockData); + // Contains all the chunks we currently have for this block. + // This should be empty if we do not have the block. + // As reconciliation progresses, we will add any updated chunks here and commit the resulting list back to the + // block. + NavigableMap localOffset2Chunk; + long localBcsid = 0; + if (blockManager.blockExists(container, blockID)) { + BlockData localBlockData = blockManager.getBlock(container, blockID); + localOffset2Chunk = localBlockData.getChunks().stream() + .collect(Collectors.toMap(ContainerProtos.ChunkInfo::getOffset, + Function.identity(), (chunk1, chunk2) -> chunk1, TreeMap::new)); + localBcsid = localBlockData.getBlockCommitSequenceId(); + } else { + localOffset2Chunk = new TreeMap<>(); } - } - /** - * This method reconciles chunks per block. It reads the missing/corrupt chunk data from the peer - * datanode and writes it to the local container. If the chunk write fails, the block commit sequence - * id is not updated. - */ - private void reconcileChunksPerBlock(KeyValueContainer container, Pipeline pipeline, - DNContainerOperationClient dnClient, long blockId, - List chunkList, ByteBuffer chunkByteBuffer) - throws IOException { - - ContainerData containerData = container.getContainerData(); - BlockID blockID = new BlockID(containerData.getContainerID(), blockId); - // The length of the block is not known, so instead of passing the default block length we pass 0. As the length - // is not used to validate the token for getBlock call. - Token blockToken = dnClient.getTokenHelper().getBlockToken(blockID, 0L); - BlockData localBlockData = getBlockManager().getBlock(container, blockID); - - SortedMap localChunksMap = localBlockData.getChunks().stream() - .collect(Collectors.toMap(ContainerProtos.ChunkInfo::getOffset, - Function.identity(), (chunk1, chunk2) -> chunk1, TreeMap::new)); - boolean overwriteBcsId = true; + boolean allChunksSuccessful = true; + int numSuccessfulChunks = 0; BlockLocationInfo blkInfo = new BlockLocationInfo.Builder() .setBlockID(blockID) @@ -1762,21 +1743,30 @@ private void reconcileChunksPerBlock(KeyValueContainer container, Pipeline pipel // initializes ChunkInputStream for each chunk. blockInputStream.initialize(); ContainerProtos.BlockData peerBlockData = blockInputStream.getStreamBlockData(); - // Check the local bcsId with the one from the bcsId from the peer datanode. - long maxBcsId = Math.max(peerBlockData.getBlockID().getBlockCommitSequenceId(), - localBlockData.getBlockCommitSequenceId()); + long maxBcsId = Math.max(localBcsid, peerBlockData.getBlockID().getBlockCommitSequenceId()); - for (ContainerProtos.ChunkMerkleTree chunkMerkleTree : chunkList) { + for (ContainerProtos.ChunkMerkleTree chunkMerkleTree : peerChunkList) { long chunkOffset = chunkMerkleTree.getOffset(); + if (!previousChunkPresent(blockID, chunkOffset, localOffset2Chunk)) { + break; + } + + if (!chunkMerkleTree.getIsHealthy()) { + LOG.warn("Skipping chunk at offset {} in block {} of container {} from peer {} since peer reported it as " + + "unhealthy.", chunkOffset, localID, containerID, peer); + continue; + } try { // Seek to the offset of the chunk. Seek updates the chunkIndex in the BlockInputStream. blockInputStream.seek(chunkOffset); ChunkInputStream currentChunkStream = blockInputStream.getChunkStreams().get( blockInputStream.getChunkIndex()); ContainerProtos.ChunkInfo chunkInfoProto = currentChunkStream.getChunkInfo(); - ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); - chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); - verifyChunksLength(chunkInfoProto, localChunksMap.get(chunkOffset)); + + // If we are overwriting a chunk, make sure is the same size as the current chunk we are replacing. + if (localOffset2Chunk.containsKey(chunkOffset)) { + verifyChunksLength(chunkInfoProto, localOffset2Chunk.get(chunkOffset)); + } // Read the chunk data from the BlockInputStream and write it to the container. int chunkLength = (int) chunkInfoProto.getLen(); @@ -1787,30 +1777,57 @@ private void reconcileChunksPerBlock(KeyValueContainer container, Pipeline pipel chunkByteBuffer.clear(); chunkByteBuffer.limit(chunkLength); int bytesRead = blockInputStream.read(chunkByteBuffer); + // Make sure we read exactly the same amount of data we expected so it fits in the block. if (bytesRead != chunkLength) { - throw new IOException("Error while reading chunk data from block input stream. Expected length: " + + throw new IOException("Error while reading chunk data from peer " + peer + ". Expected length: " + chunkLength + ", Actual length: " + bytesRead); } chunkByteBuffer.flip(); ChunkBuffer chunkBuffer = ChunkBuffer.wrap(chunkByteBuffer); + ChunkInfo chunkInfo = ChunkInfo.getFromProtoBuf(chunkInfoProto); + chunkInfo.addMetadata(OzoneConsts.CHUNK_OVERWRITE, "true"); writeChunkForClosedContainer(chunkInfo, blockID, chunkBuffer, container); - // In reconciling missing chunks which happens at the end of the block, we are expected to have holes in - // the blockData's chunk list because we continue to reconcile even if there are failures while reconciling - // chunks which is fine as we don't update the bcsId. - localChunksMap.put(chunkInfo.getOffset(), chunkInfoProto); + localOffset2Chunk.put(chunkOffset, chunkInfoProto); + if (LOG.isDebugEnabled()) { + LOG.debug("Successfully ingested chunk at offset {} into block {} of container {} from peer {}", + chunkOffset, localID, containerID, peer); + } + numSuccessfulChunks++; } catch (IOException ex) { - overwriteBcsId = false; - LOG.error("Error while reconciling chunk {} for block {} in container {}", - chunkOffset, blockID, containerData.getContainerID(), ex); + // The peer's chunk was expected to be healthy. Log a stack trace for more info as to why this failed. + LOG.error("Failed to ingest chunk at offset {} for block {} in container {} from peer {}", + chunkOffset, localID, containerID, peer, ex); + allChunksSuccessful = false; + } + // Stop block repair once we fail to pull a chunk from the peer. + // Our write chunk API currently does not have a good way to handle writing around holes in a block. + if (!allChunksSuccessful) { + break; } } - List localChunkList = new ArrayList<>(localChunksMap.values()); - localBlockData.setChunks(localChunkList); - putBlockForClosedContainer(container, localBlockData, maxBcsId, overwriteBcsId); - chunkManager.finishWriteChunks(container, localBlockData); + // Do not update block metadata in this container if we did not ingest any chunks for the block. + if (!localOffset2Chunk.isEmpty()) { + BlockData putBlockData = BlockData.getFromProtoBuf(peerBlockData); + List allChunks = new ArrayList<>(localOffset2Chunk.values()); + putBlockData.setChunks(allChunks); + putBlockForClosedContainer(container, putBlockData, maxBcsId, allChunksSuccessful); + treeWriter.addChunks(localID, true, allChunks); + // Invalidate the file handle cache, so new read requests get the new file if one was created. + chunkManager.finishWriteChunks(container, putBlockData); + } } + + if (!allChunksSuccessful) { + LOG.warn("Partially reconciled block {} in container {} with peer {}. {}/{} chunks were " + + "obtained successfully", localID, containerID, peer, numSuccessfulChunks, peerChunkList.size()); + } else if (LOG.isDebugEnabled()) { + LOG.debug("Reconciled all {} chunks in block {} in container {} from peer {}", + peerChunkList.size(), localID, containerID, peer); + } + + return numSuccessfulChunks; } private void verifyChunksLength(ContainerProtos.ChunkInfo peerChunkInfo, ContainerProtos.ChunkInfo localChunkInfo) @@ -1830,6 +1847,34 @@ private void verifyChunksLength(ContainerProtos.ChunkInfo peerChunkInfo, Contain } } + /** + * If we do not have the previous chunk for the current entry, abort the reconciliation here. Currently we do + * not support repairing around holes in a block, the missing chunk must be obtained first. + */ + private boolean previousChunkPresent(BlockID blockID, long chunkOffset, + NavigableMap localOffset2Chunk) { + long localID = blockID.getLocalID(); + long containerID = blockID.getContainerID(); + if (chunkOffset != 0) { + Map.Entry prevEntry = localOffset2Chunk.lowerEntry(chunkOffset); + if (prevEntry == null) { + // We are trying to write a chunk that is not the first, but we currently have no chunks in the block. + LOG.warn("Exiting reconciliation for block {} in container {} at length {}. The previous chunk is not " + + "present locally.", localID, containerID, 0); + return false; + } else { + long prevOffset = prevEntry.getKey(); + long prevLength = prevEntry.getValue().getLen(); + if (prevOffset + prevLength != chunkOffset) { + LOG.warn("Exiting reconciliation for block {} in container {} at length {}. The previous chunk is not " + + "present locally.", localID, containerID, prevOffset + prevLength); + return false; + } + } + } + return true; + } + /** * Called by BlockDeletingService to delete all the chunks in a block * before proceeding to delete the block info from DB. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java index 372e45f09337..26ffe23a4cec 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java @@ -91,19 +91,18 @@ public class TestKeyValueHandlerContainerReconciliation { */ public static Stream corruptionValues() { return Stream.of( -// Arguments.of(5, 0), -// Arguments.of(0, 5), -// Arguments.of(0, 10), -// Arguments.of(10, 0), -// Arguments.of(5, 10), - // TODO - Arguments.of(10, 5) -// Arguments.of(2, 3), -// Arguments.of(3, 2), -// Arguments.of(4, 6), -// Arguments.of(6, 4), -// Arguments.of(6, 9), -// Arguments.of(9, 6) + Arguments.of(5, 0), + Arguments.of(0, 5), + Arguments.of(0, 10), + Arguments.of(10, 0), + Arguments.of(5, 10), + Arguments.of(10, 5), + Arguments.of(2, 3), + Arguments.of(3, 2), + Arguments.of(4, 6), + Arguments.of(6, 4), + Arguments.of(6, 9), + Arguments.of(9, 6) ); } @@ -312,7 +311,7 @@ public long checkAndGetDataChecksum(long containerID) { } catch (IOException ex) { fail("Failed to read container checksum from disk", ex); } - log.info("Retrieved data checksum {} from container {}", HddsUtils.checksumToString(healthyDataChecksum), + log.info("Retrieved data checksum {} from container {}", HddsUtils.checksumToString(dataChecksum), containerID); return dataChecksum; } From f49a9dd15c97fc49d22187426bc36757c8945b3c Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 5 May 2025 13:13:12 -0400 Subject: [PATCH 35/62] Wait for on-demand scans to complete in test --- ...eyValueHandlerContainerReconciliation.java | 40 +++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java index 26ffe23a4cec..2b3e86579f5f 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java @@ -25,6 +25,7 @@ import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerDataScanner; +import org.apache.ozone.test.GenericTestUtils; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; @@ -59,6 +60,7 @@ import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; +import java.util.concurrent.TimeoutException; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -168,10 +170,15 @@ public void testContainerReconciliation(int numBlocksToDelete, int numChunksToCo dnsToCorrupt.get(0).introduceCorruption(CONTAINER_ID, numBlocksToDelete, numChunksToCorrupt, false); dnsToCorrupt.get(1).introduceCorruption(CONTAINER_ID, numBlocksToDelete, numChunksToCorrupt, true); // Use synchronous on-demand scans to re-build the merkle trees after corruption. - dnsToCorrupt.forEach(d -> d.scanContainer(CONTAINER_ID)); + datanodes.forEach(d -> d.scanContainer(CONTAINER_ID)); // Without reconciliation, checksums should be different because of the corruption. assertUniqueChecksumCount(CONTAINER_ID, datanodes, 3); + // Each datanode should have had one on-demand scan during test setup, and a second one after corruption was + // introduced. + final int originalScanCount = 2; + waitForExpectedScanCount(originalScanCount); + // Reconcile each datanode with its peers. // In a real cluster, SCM will not send a command to reconcile a datanode with itself. for (MockDatanode current : datanodes) { @@ -181,13 +188,31 @@ public void testContainerReconciliation(int numBlocksToDelete, int numChunksToCo .collect(Collectors.toList()); current.reconcileContainer(dnClient, peers, CONTAINER_ID); } + // Reconciliation should have triggered a second on-demand scan for each replica. Wait for them to finish before + // checking the results. + waitForExpectedScanCount(originalScanCount + 1); // After reconciliation, checksums should be the same for all containers. - // Reconciliation should have updated the tree based on the updated metadata that was obtained for the - // previously corrupted data. We do not need to wait for the full data scan to complete. long repairedDataChecksum = assertUniqueChecksumCount(CONTAINER_ID, datanodes, 1); assertEquals(healthyDataChecksum, repairedDataChecksum); } + /** + * Uses the on-demand container scanner metrics to wait for the expected number of on demand scans to complete. + * Since the metrics are static and shared across all datanodes in this test, this count should be the total number + * of scans across all nodes. + */ + private void waitForExpectedScanCount(int expectedCount) throws Exception { + for (MockDatanode datanode: datanodes) { + try { + GenericTestUtils.waitFor(() -> datanode.getOnDemandScanCount() == expectedCount, 100, 5_000); + } catch (TimeoutException ex) { + LOG.error("Timed out waiting for on-demand scan count {} to reach expected count {} on datanode {}", + datanode.getOnDemandScanCount(), expectedCount, datanode); + throw ex; + } + } + } + /** * Checks for the expected number of unique checksums among a container on the provided datanodes. * @return The data checksum from one of the nodes. Useful if expectedUniqueChecksums = 1. @@ -381,6 +406,10 @@ public void scanContainer(long containerID) { } } + public int getOnDemandScanCount() { + return onDemandScanner.getMetrics().getNumContainersScanned(); + } + public void reconcileContainer(DNContainerOperationClient dnClient, Collection peers, long containerID) { log.info("Beginning reconciliation on this mock datanode"); @@ -460,6 +489,11 @@ public void addContainerWithBlocks(long containerId, int blocks) throws Exceptio handler.closeContainer(container); } + @Override + public String toString() { + return dnDetails.toString(); + } + /** * Returns a list of all blocks in the container sorted numerically by blockID. * For example, the unsorted list would have the first blocks as 1, 10, 11... From f5d4dbf3e796ebe35c108b0889ae541b1f76f4be Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 5 May 2025 14:03:23 -0400 Subject: [PATCH 36/62] Improve char data generation, reset scan metrics --- ...eyValueHandlerContainerReconciliation.java | 45 ++++++++++++------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java index 2b3e86579f5f..d4bb5f6180b7 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java @@ -1,6 +1,7 @@ package org.apache.hadoop.ozone.container.keyvalue; import org.apache.commons.io.IOUtils; +import org.apache.commons.text.RandomStringGenerator; import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -28,6 +29,7 @@ import org.apache.ozone.test.GenericTestUtils; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.io.TempDir; @@ -111,7 +113,7 @@ public static Stream corruptionValues() { public static final Logger LOG = LoggerFactory.getLogger(TestKeyValueHandlerContainerReconciliation.class); // All container replicas will be placed in this directory, and the same replicas will be re-used for each test run. -// @TempDir + @TempDir private static Path containerDir; private static DNContainerOperationClient dnClient; private static MockedStatic containerProtocolMock; @@ -130,7 +132,6 @@ public static Stream corruptionValues() { */ @BeforeAll public static void setup() throws Exception { - containerDir = Files.createTempDirectory("reconcile"); LOG.info("Data written to {}", containerDir); dnClient = new DNContainerOperationClient(new OzoneConfiguration(), null, null); datanodes = new ArrayList<>(); @@ -147,11 +148,19 @@ public static void setup() throws Exception { datanodes.forEach(d -> d.scanContainer(CONTAINER_ID)); healthyDataChecksum = assertUniqueChecksumCount(CONTAINER_ID, datanodes, 1); + // Do not count the initial synchronous scan to build the merkle tree towards the scan count in the tests. + // This lets each test run start counting the number of scans from zero. + datanodes.forEach(MockDatanode::resetOnDemandScanCount); containerProtocolMock = Mockito.mockStatic(ContainerProtocolCalls.class); mockContainerProtocolCalls(); } + @AfterEach + public void reset() { + datanodes.forEach(MockDatanode::resetOnDemandScanCount); + } + @AfterAll public static void teardown() { if (containerProtocolMock != null) { @@ -176,8 +185,7 @@ public void testContainerReconciliation(int numBlocksToDelete, int numChunksToCo // Each datanode should have had one on-demand scan during test setup, and a second one after corruption was // introduced. - final int originalScanCount = 2; - waitForExpectedScanCount(originalScanCount); + waitForExpectedScanCount(1); // Reconcile each datanode with its peers. // In a real cluster, SCM will not send a command to reconcile a datanode with itself. @@ -190,21 +198,20 @@ public void testContainerReconciliation(int numBlocksToDelete, int numChunksToCo } // Reconciliation should have triggered a second on-demand scan for each replica. Wait for them to finish before // checking the results. - waitForExpectedScanCount(originalScanCount + 1); + waitForExpectedScanCount(2); // After reconciliation, checksums should be the same for all containers. long repairedDataChecksum = assertUniqueChecksumCount(CONTAINER_ID, datanodes, 1); assertEquals(healthyDataChecksum, repairedDataChecksum); } /** - * Uses the on-demand container scanner metrics to wait for the expected number of on demand scans to complete. - * Since the metrics are static and shared across all datanodes in this test, this count should be the total number - * of scans across all nodes. + * Uses the on-demand container scanner metrics to wait for the expected number of on-demand scans to complete on + * every datanode. */ private void waitForExpectedScanCount(int expectedCount) throws Exception { for (MockDatanode datanode: datanodes) { try { - GenericTestUtils.waitFor(() -> datanode.getOnDemandScanCount() == expectedCount, 100, 5_000); + GenericTestUtils.waitFor(() -> datanode.getOnDemandScanCount() == expectedCount, 100, 10_000); } catch (TimeoutException ex) { LOG.error("Timed out waiting for on-demand scan count {} to reach expected count {} on datanode {}", datanode.getOnDemandScanCount(), expectedCount, datanode); @@ -410,6 +417,10 @@ public int getOnDemandScanCount() { return onDemandScanner.getMetrics().getNumContainersScanned(); } + public void resetOnDemandScanCount() { + onDemandScanner.getMetrics().resetNumContainersScanned(); + } + public void reconcileContainer(DNContainerOperationClient dnClient, Collection peers, long containerID) { log.info("Beginning reconciliation on this mock datanode"); @@ -421,9 +432,8 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Collection Date: Mon, 5 May 2025 14:10:04 -0400 Subject: [PATCH 37/62] Update test name --- ...java => TestContainerReconciliationWithMockDatanodes.java} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/{TestKeyValueHandlerContainerReconciliation.java => TestContainerReconciliationWithMockDatanodes.java} (99%) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java similarity index 99% rename from hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java rename to hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java index d4bb5f6180b7..d44b898c3158 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerContainerReconciliation.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java @@ -87,7 +87,7 @@ * replica that is stored in a local directory. The reconciliation client is mocked to return the corresponding local * container for each datanode peer. */ -public class TestKeyValueHandlerContainerReconciliation { +public class TestContainerReconciliationWithMockDatanodes { /** * Number of corrupt blocks and chunks. * @@ -110,7 +110,7 @@ public static Stream corruptionValues() { ); } - public static final Logger LOG = LoggerFactory.getLogger(TestKeyValueHandlerContainerReconciliation.class); + public static final Logger LOG = LoggerFactory.getLogger(TestContainerReconciliationWithMockDatanodes.class); // All container replicas will be placed in this directory, and the same replicas will be re-used for each test run. @TempDir From e0aa7cb8f5e5d677608b710351b4230098a4db9c Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 5 May 2025 14:19:24 -0400 Subject: [PATCH 38/62] Checkstyle --- .../ContainerChecksumTreeManager.java | 1 - .../common/helpers/ContainerUtils.java | 4 -- .../container/common/impl/ContainerSet.java | 1 - .../container/common/interfaces/Handler.java | 1 - .../BackgroundContainerMetadataScanner.java | 3 -- .../ozoneimpl/ContainerScannerMixin.java | 6 +-- ...tainerReconciliationWithMockDatanodes.java | 2 - .../keyvalue/TestKeyValueHandler.java | 39 ------------------- 8 files changed, 2 insertions(+), 55 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index d442449d6974..615afec9e6ef 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -37,7 +37,6 @@ import java.util.TreeSet; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; - import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java index 1c2dcb1bde76..88309df8d3f2 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java @@ -33,9 +33,6 @@ import java.nio.file.Paths; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.time.Duration; -import java.time.Instant; -import java.util.Optional; import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -53,7 +50,6 @@ import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; -import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index 250d4f62d3d4..8204f58953c8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -48,7 +48,6 @@ import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; -import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java index 38efe92147b8..5feec61a667d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java @@ -22,7 +22,6 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.Collection; -import java.util.Set; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java index aeb9a0e077dc..f2b6e295b1b4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java @@ -20,10 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import java.io.IOException; import java.util.Iterator; -import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils; -import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java index e21db9ed61bc..e44e99227bc6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java @@ -17,12 +17,12 @@ package org.apache.hadoop.ozone.container.ozoneimpl; +import static org.apache.hadoop.hdds.HddsUtils.checksumToString; + import java.io.IOException; import java.time.Duration; import java.time.Instant; import java.util.Optional; - -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; @@ -33,8 +33,6 @@ import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.slf4j.Logger; -import static org.apache.hadoop.hdds.HddsUtils.checksumToString; - /** * Mixin to handle common data and metadata scan operations among background and on-demand scanners. */ diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java index d44b898c3158..eda42d638373 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java @@ -40,7 +40,6 @@ import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; @@ -58,7 +57,6 @@ import java.util.Map; import java.util.Optional; import java.util.Random; -import java.util.Set; import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 667f266c97ad..98579721f554 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -17,7 +17,6 @@ package org.apache.hadoop.ozone.container.keyvalue; -import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_VOLUME_CHOOSING_POLICY; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails; @@ -27,10 +26,7 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_LAYOUT_KEY; import static org.apache.hadoop.ozone.OzoneConsts.GB; -import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.WRITE_STAGE; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createBlockMetaData; -import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createDbInstancesForTestIfNeeded; -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.getBlock; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -39,7 +35,6 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.any; import static org.mockito.Mockito.atMostOnce; @@ -49,37 +44,22 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import com.google.common.collect.ImmutableList; import java.io.File; import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.StandardOpenOption; import java.time.Clock; -import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; -import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Random; import java.util.Set; import java.util.UUID; -import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Stream; import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.RandomStringUtils; import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; @@ -88,27 +68,17 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; -import org.apache.hadoop.hdds.scm.XceiverClientSpi; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; -import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; -import org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls; import org.apache.hadoop.hdds.security.token.TokenVerifier; -import org.apache.hadoop.hdds.utils.db.BatchOperation; -import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.ozone.common.Checksum; -import org.apache.hadoop.ozone.common.ChecksumData; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; import org.apache.hadoop.ozone.container.common.ContainerTestUtils; -import org.apache.hadoop.ozone.container.common.helpers.BlockData; -import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; import org.apache.hadoop.ozone.container.common.impl.ContainerLayoutVersion; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; import org.apache.hadoop.ozone.container.common.impl.HddsDispatcher; import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; import org.apache.hadoop.ozone.container.common.interfaces.Handler; import org.apache.hadoop.ozone.container.common.report.IncrementalReportSender; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; @@ -116,28 +86,19 @@ import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; -import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy; import org.apache.hadoop.ozone.container.common.volume.StorageVolume; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; -import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerDataScanner; import org.apache.hadoop.util.Sets; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.io.TempDir; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; -import org.mockito.Mock; -import org.mockito.MockedStatic; import org.mockito.Mockito; -import org.mockito.invocation.InvocationOnMock; /** * Unit tests for {@link KeyValueHandler}. From 9322b4a4eb902034614c75f9b7051fd81cb38a43 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 12 May 2025 22:49:07 -0400 Subject: [PATCH 39/62] Fix TODOs dependent on this patch Clean up checksum update invocation using container controller. --- .../ContainerChecksumTreeManager.java | 10 +-- .../container/common/interfaces/Handler.java | 14 ++++ .../container/keyvalue/KeyValueHandler.java | 81 +++++++++++++------ .../BackgroundContainerDataScanner.java | 15 +--- .../ozoneimpl/ContainerController.java | 19 +++++ .../ozoneimpl/ContainerScannerMixin.java | 30 +------ .../OnDemandContainerDataScanner.java | 7 +- .../container/ozoneimpl/OzoneContainer.java | 4 +- ...tainerReconciliationWithMockDatanodes.java | 2 +- .../keyvalue/TestKeyValueHandler.java | 2 +- .../TestBackgroundContainerDataScanner.java | 12 ++- .../TestOnDemandContainerDataScanner.java | 12 ++- .../TestContainerCommandReconciliation.java | 57 ++----------- 13 files changed, 119 insertions(+), 146 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java index 6d9e1a3d5731..81eb1ddb863f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java @@ -87,9 +87,6 @@ public void stop() { public ContainerProtos.ContainerChecksumInfo writeContainerDataTree(ContainerData data, ContainerMerkleTreeWriter tree) throws IOException { long containerID = data.getContainerID(); - // If there is an error generating the tree and we cannot obtain a final checksum, use 0 to indicate a metadata - // failure. - long dataChecksum = 0; ContainerProtos.ContainerChecksumInfo checksumInfo = null; Lock writeLock = getLock(containerID); writeLock.lock(); @@ -111,12 +108,9 @@ public ContainerProtos.ContainerChecksumInfo writeContainerDataTree(ContainerDat .setContainerMerkleTree(treeProto); checksumInfo = checksumInfoBuilder.build(); write(data, checksumInfo); - // If write succeeds, update the checksum in memory. Otherwise 0 will be used to indicate the metadata failure. - dataChecksum = treeProto.getDataChecksum(); - LOG.debug("Data merkle tree for container {} updated with container checksum {}", containerID, dataChecksum); + LOG.debug("Data merkle tree for container {} updated with container checksum {}", containerID, + treeProto.getDataChecksum()); } finally { - // Even if persisting the tree fails, we should still update the data checksum in memory to report back to SCM. - data.setDataChecksum(dataChecksum); writeLock.unlock(); } return checksumInfo; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java index 5feec61a667d..ee6e702db561 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java @@ -29,7 +29,9 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; +import org.apache.hadoop.ozone.common.Storage; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; +import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeWriter; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; @@ -38,6 +40,7 @@ import org.apache.hadoop.ozone.container.common.report.IncrementalReportSender; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker; import org.apache.ratis.statemachine.StateMachine; @@ -152,6 +155,17 @@ public abstract void exportContainer( public abstract void markContainerForClose(Container container) throws IOException; + /** + * Updates the container checksum information on disk and in memory. + * + * @param container The container to update + * @param treeWriter The container merkle tree with the updated information about the container + * @throws IOException For errors sending an ICR. If updating the checksums on disk fails, the checksum information + * will remain unchanged with no exception thrown. + */ + public abstract void updateContainerChecksum(Container container, ContainerMerkleTreeWriter treeWriter) + throws IOException; + /** * Marks the container Unhealthy. Moves the container to UNHEALTHY state. * diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 0c4a1b72ccdc..b9d6f0cebb21 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -308,8 +308,8 @@ public ContainerCommandResponseProto handle( @VisibleForTesting static ContainerCommandResponseProto dispatchRequest(KeyValueHandler handler, - ContainerCommandRequestProto request, KeyValueContainer kvContainer, - DispatcherContext dispatcherContext) { + ContainerCommandRequestProto request, KeyValueContainer kvContainer, + DispatcherContext dispatcherContext) { Type cmdType = request.getCmdType(); // Validate the request has been made to the correct datanode with the node id matching. if (kvContainer != null) { @@ -509,7 +509,7 @@ ContainerCommandResponseProto handleCreateContainer( } private void populateContainerPathFields(KeyValueContainer container, - HddsVolume hddsVolume) throws IOException { + HddsVolume hddsVolume) throws IOException { volumeSet.readLock(); try { String idDir = VersionedDatanodeFeatures.ScmHA.chooseContainerPathID( @@ -630,22 +630,21 @@ ContainerCommandResponseProto handleCloseContainer( /** * Write the merkle tree for this container using the existing checksum metadata only. The data is not read or * validated by this method, so it is expected to run quickly. - * + *

* If a checksum file already exists on the disk, this method will do nothing. The existing file would have either * been made from the metadata or data itself so there is no need to recreate it from the metadata. - * - * TODO: This method should be changed to private after HDDS-10374 is merged. + *

* * @param container The container which will have a tree generated. */ - public void createContainerMerkleTreeFromMetadata(Container container) { + private void updateContainerChecksumFromMetadata(Container container) { if (ContainerChecksumTreeManager.checksumFileExist(container)) { return; } try { KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); - updateAndGetContainerChecksum(containerData); + updateAndGetContainerChecksumFromMetadata(containerData); } catch (IOException ex) { LOG.error("Cannot create container checksum for container {} , Exception: ", container.getContainerData().getContainerID(), ex); @@ -1008,7 +1007,7 @@ private void validateChunkChecksumData(ChunkBufferToByteString data, ChunkInfo i if (validateChunkChecksumData) { try { if (data instanceof ChunkBuffer) { - final ChunkBuffer b = (ChunkBuffer)data; + final ChunkBuffer b = (ChunkBuffer) data; Checksum.verifyChecksum(b.duplicate(b.position(), b.limit()), info.getChecksumData(), 0); } else { Checksum.verifyChecksum(data.toByteString(byteBufferToByteString), info.getChecksumData(), 0); @@ -1102,7 +1101,6 @@ ContainerCommandResponseProto handleWriteChunk( /** * Handle Write Chunk operation for closed container. Calls ChunkManager to process the request. - * */ public void writeChunkForClosedContainer(ChunkInfo chunkInfo, BlockID blockID, ChunkBuffer data, KeyValueContainer kvContainer) @@ -1128,6 +1126,7 @@ public void writeChunkForClosedContainer(ChunkInfo chunkInfo, BlockID blockID, /** * Handle Put Block operation for closed container. Calls BlockManager to process the request. * This is primarily used by container reconciliation process to persist the block data for closed container. + * * @param kvContainer - Container for which block data need to be persisted. * @param blockData - Block Data to be persisted (BlockData should have the chunks). * @param blockCommitSequenceId - Block Commit Sequence ID for the block. @@ -1137,7 +1136,7 @@ public void writeChunkForClosedContainer(ChunkInfo chunkInfo, BlockID blockID, */ public void putBlockForClosedContainer(KeyValueContainer kvContainer, BlockData blockData, long blockCommitSequenceId, boolean overwriteBscId) - throws IOException { + throws IOException { Preconditions.checkNotNull(kvContainer); Preconditions.checkNotNull(blockData); long startTime = Time.monotonicNowNanos(); @@ -1293,6 +1292,7 @@ ContainerCommandResponseProto handleUnsupportedOp( /** * Check if container is open. Throw exception otherwise. + * * @param kvContainer * @throws StorageContainerException */ @@ -1337,6 +1337,7 @@ private void checkContainerOpen(KeyValueContainer kvContainer) /** * Check if container is Closed. + * * @param kvContainer */ private boolean checkContainerClose(KeyValueContainer kvContainer) { @@ -1350,8 +1351,8 @@ private boolean checkContainerClose(KeyValueContainer kvContainer) { @Override public Container importContainer(ContainerData originalContainerData, - final InputStream rawContainerStream, - final TarContainerPacker packer) throws IOException { + final InputStream rawContainerStream, + final TarContainerPacker packer) throws IOException { Preconditions.checkState(originalContainerData instanceof KeyValueContainerData, "Should be KeyValueContainerData instance"); @@ -1372,8 +1373,8 @@ public Container importContainer(ContainerData originalContainerData, @Override public void exportContainer(final Container container, - final OutputStream outputStream, - final TarContainerPacker packer) + final OutputStream outputStream, + final TarContainerPacker packer) throws IOException { final KeyValueContainer kvc = (KeyValueContainer) container; kvc.exportContainerData(outputStream, packer); @@ -1399,11 +1400,45 @@ public void markContainerForClose(Container container) } finally { container.writeUnlock(); } - createContainerMerkleTreeFromMetadata(container); + updateContainerChecksumFromMetadata(container); ContainerLogger.logClosing(container.getContainerData()); sendICR(container); } + @Override + public void updateContainerChecksum(Container container, ContainerMerkleTreeWriter treeWriter) + throws StorageContainerException { + ContainerData containerData = container.getContainerData(); + + // Attempt to write the new data checksum to disk. If persisting this fails, keep using the original data + // checksum to prevent divergence from what SCM sees in the ICR vs what datanode peers will see when pulling the + // merkle tree. + long originalDataChecksum = containerData.getDataChecksum(); + long updatedDataChecksum = originalDataChecksum; + try { + updatedDataChecksum = + checksumManager.writeContainerDataTree(containerData, treeWriter).getContainerMerkleTree().getDataChecksum(); + } catch (IOException ex) { + LOG.error("Failed to write container merkle tree for container {}", containerData.getContainerID(), ex); + } + + if (updatedDataChecksum != originalDataChecksum) { + containerData.setDataChecksum(updatedDataChecksum); + String message = + "Container data checksum updated from " + checksumToString(originalDataChecksum) + " to " + + checksumToString(updatedDataChecksum); + sendICR(container); + if (ContainerChecksumTreeManager.hasContainerChecksumFile(containerData)) { + LOG.warn(message); + ContainerLogger.logChecksumUpdated(containerData, originalDataChecksum); + } else { + // If this is the first time the scanner has run with the feature to generate a checksum file, don't + // log a warning for the checksum update. + LOG.debug(message); + } + } + } + @Override public void markContainerUnhealthy(Container container, ScanResult reason) throws IOException { @@ -1462,7 +1497,7 @@ public void quasiCloseContainer(Container container, String reason) } finally { container.writeUnlock(); } - createContainerMerkleTreeFromMetadata(container); + updateContainerChecksumFromMetadata(container); ContainerLogger.logQuasiClosed(container.getContainerData(), reason); sendICR(container); } @@ -1496,7 +1531,7 @@ public void closeContainer(Container container) } finally { container.writeUnlock(); } - createContainerMerkleTreeFromMetadata(container); + updateContainerChecksumFromMetadata(container); ContainerLogger.logClosed(container.getContainerData()); sendICR(container); } @@ -1522,7 +1557,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container originalChecksumInfo = optionalChecksumInfo.get(); } else { // Try creating the checksum info from RocksDB metadata if it is not present. - originalChecksumInfo = updateAndGetContainerChecksum(containerData); + originalChecksumInfo = updateAndGetContainerChecksumFromMetadata(containerData); } // This holds our current most up-to-date checksum info that we are using for the container. ContainerProtos.ContainerChecksumInfo latestChecksumInfo = originalChecksumInfo; @@ -1667,18 +1702,18 @@ containerID, peer, checksumToString(previousDataChecksum), checksumToString(late * Updates the container merkle tree based on the RocksDb's block metadata and returns the updated checksum info. * @param containerData - Container data for which the container merkle tree needs to be updated. */ - private ContainerProtos.ContainerChecksumInfo updateAndGetContainerChecksum(KeyValueContainerData containerData) - throws IOException { + private ContainerProtos.ContainerChecksumInfo updateAndGetContainerChecksumFromMetadata( + KeyValueContainerData containerData) throws IOException { ContainerMerkleTreeWriter merkleTree = new ContainerMerkleTreeWriter(); try (DBHandle dbHandle = BlockUtils.getDB(containerData, conf); BlockIterator blockIterator = dbHandle.getStore(). getBlockIterator(containerData.getContainerID())) { while (blockIterator.hasNext()) { BlockData blockData = blockIterator.nextBlock(); - List chunkInfos = blockData.getChunks(); - // TODO: Add empty blocks to the merkle tree. Done in HDDS-10374, needs to be backported. + merkleTree.addBlock(blockData.getLocalID()); // Assume all chunks are healthy when building the tree from metadata. Scanner will identify corruption when // it runs after. + List chunkInfos = blockData.getChunks(); merkleTree.addChunks(blockData.getLocalID(), true, chunkInfos); } } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java index a15409f66a76..0229b3c8618e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java @@ -48,12 +48,11 @@ public class BackgroundContainerDataScanner extends private final Canceler canceler; private static final String NAME_FORMAT = "ContainerDataScanner(%s)"; private final ContainerDataScannerMetrics metrics; - private final ContainerChecksumTreeManager checksumManager; private final ContainerScannerMixin scannerMixin; public BackgroundContainerDataScanner(ContainerScannerConfiguration conf, ContainerController controller, - HddsVolume volume, ContainerChecksumTreeManager checksumManager) { + HddsVolume volume) { super(String.format(NAME_FORMAT, volume), conf.getDataScanInterval()); this.controller = controller; this.volume = volume; @@ -61,7 +60,6 @@ public BackgroundContainerDataScanner(ContainerScannerConfiguration conf, canceler = new Canceler(); this.metrics = ContainerDataScannerMetrics.create(volume.toString()); this.metrics.setStorageDirectory(volume.toString()); - this.checksumManager = checksumManager; this.scannerMixin = new ContainerScannerMixin(LOG, controller, metrics, conf); } @@ -74,7 +72,7 @@ public void scanContainer(Container c) shutdown("The volume has failed."); return; } - scannerMixin.scanData(c, checksumManager, throttler, canceler); + scannerMixin.scanData(c, throttler, canceler); } @Override @@ -82,15 +80,6 @@ public Iterator> getContainerIterator() { return controller.getContainers(volume); } - private static void logScanStart(ContainerData containerData) { - if (LOG.isDebugEnabled()) { - Optional scanTimestamp = containerData.lastDataScanTime(); - Object lastScanTime = scanTimestamp.map(ts -> "at " + ts).orElse("never"); - LOG.debug("Scanning container {}, last scanned {}", - containerData.getContainerID(), lastScanTime); - } - } - @Override public synchronized void shutdown() { shutdown(""); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java index 37e50953f050..a7ddcc1eb175 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; +import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeWriter; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.impl.ContainerSet; @@ -36,6 +37,7 @@ import org.apache.hadoop.ozone.container.common.interfaces.Handler; import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -123,6 +125,23 @@ public boolean markContainerUnhealthy(final long containerId, ScanResult reason) return true; } } + /** + * Updates the container checksum information on disk and in memory. + * + * @param containerId The ID of the container to update + * @param treeWriter The container merkle tree with the updated information about the container + * @throws IOException For errors sending an ICR. If updating the checksums on disk fails, the checksum information + * will remain unchanged with no exception thrown. + */ + public void updateContainerChecksum(long containerId, ContainerMerkleTreeWriter treeWriter) + throws IOException { + Container container = getContainer(containerId); + if (container == null) { + LOG.warn("Container {} not found, may be deleted, skip updating checksums", containerId); + } else { + getHandler(container).updateContainerChecksum(container, treeWriter); + } + } /** * Returns the container report. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java index e44e99227bc6..9b1bad11c4ca 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java @@ -50,46 +50,20 @@ public ContainerScannerMixin(Logger log, ContainerController controller, this.minScanGap = conf.getContainerScanMinGap(); } - public void scanData(Container container, ContainerChecksumTreeManager checksumManager, - DataTransferThrottler throttler, Canceler canceler) + public void scanData(Container container, DataTransferThrottler throttler, Canceler canceler) throws IOException, InterruptedException { if (!shouldScanData(container)) { return; } ContainerData containerData = container.getContainerData(); long containerId = containerData.getContainerID(); - long originalDataChecksum = containerData.getDataChecksum(); - long updatedDataChecksum = originalDataChecksum; - boolean hasChecksumFile = ContainerChecksumTreeManager.hasContainerChecksumFile(containerData); logScanStart(containerData); DataScanResult result = container.scanData(throttler, canceler); if (result.isDeleted()) { log.debug("Container [{}] has been deleted during the data scan.", containerId); } else { - // Merkle tree write failure should not abort the scanning process. Continue marking the scan as completed. - try { - // Also updates the data checksum in containerData. - checksumManager.writeContainerDataTree(containerData, result.getDataTree()); - updatedDataChecksum = containerData.getDataChecksum(); - } catch (IOException ex) { - log.error("Failed to write container merkle tree for container {}", containerId, ex); - } - - if (updatedDataChecksum != originalDataChecksum) { - String message = - "Container data checksum updated from " + checksumToString(originalDataChecksum) + " to " + - checksumToString(updatedDataChecksum); - if (hasChecksumFile) { - log.warn(message); - ContainerLogger.logChecksumUpdated(containerData, originalDataChecksum); - } else { - // If this is the first time the scanner has run with the feature to generate a checksum file, don't - // log a warning for the checksum update. - log.debug(message); - } - } - + controller.updateContainerChecksum(containerId, result.getDataTree()); if (!result.isHealthy()) { handleUnhealthyScanResult(containerId, result); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index b26f45dec981..0083f5b82224 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -44,19 +44,16 @@ public final class OnDemandContainerDataScanner { private final ConcurrentHashMap .KeySetView containerRescheduleCheckSet; private final OnDemandScannerMetrics metrics; - private final ContainerChecksumTreeManager checksumManager; private final ContainerScannerMixin scannerMixin; public OnDemandContainerDataScanner( - ContainerScannerConfiguration conf, ContainerController controller, - ContainerChecksumTreeManager checksumManager) { + ContainerScannerConfiguration conf, ContainerController controller) { throttler = new DataTransferThrottler( conf.getOnDemandBandwidthPerVolume()); canceler = new Canceler(); metrics = OnDemandScannerMetrics.create(); scanExecutor = Executors.newSingleThreadExecutor(); containerRescheduleCheckSet = ConcurrentHashMap.newKeySet(); - this.checksumManager = checksumManager; this.scannerMixin = new ContainerScannerMixin(LOG, controller, metrics, conf); } @@ -87,7 +84,7 @@ private void removeContainerFromScheduledContainers( private void performOnDemandScan(Container container) { try { - scannerMixin.scanData(container, checksumManager, throttler, canceler); + scannerMixin.scanData(container, throttler, canceler); } catch (IOException e) { LOG.warn("Unexpected exception while scanning container " + container.getContainerData().getContainerID(), e); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index bf0fcede2a24..a03339944868 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -399,7 +399,7 @@ private void initContainerScanner(ContainerScannerConfiguration c) { dataScanners = new ArrayList<>(); for (StorageVolume v : volumeSet.getVolumesList()) { BackgroundContainerDataScanner s = - new BackgroundContainerDataScanner(c, controller, (HddsVolume) v, checksumTreeManager); + new BackgroundContainerDataScanner(c, controller, (HddsVolume) v); s.start(); dataScanners.add(s); backgroundScanners.add(s); @@ -433,7 +433,7 @@ private void initOnDemandContainerScanner(ContainerScannerConfiguration c) { "so the on-demand container data scanner will not start."); return; } - onDemandScanner = new OnDemandContainerDataScanner(c, controller, checksumTreeManager); + onDemandScanner = new OnDemandContainerDataScanner(c, controller); containerSet.registerContainerScanHandler(onDemandScanner::scanContainer); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java index eda42d638373..9022845e3053 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java @@ -304,7 +304,7 @@ public MockDatanode(DatanodeDetails dnDetails, Path tempDir) throws IOException ContainerController controller = new ContainerController(containerSet, Collections.singletonMap(ContainerProtos.ContainerType.KeyValueContainer, handler)); onDemandScanner = new OnDemandContainerDataScanner( - conf.getObject(ContainerScannerConfiguration.class), controller, handler.getChecksumManager()); + conf.getObject(ContainerScannerConfiguration.class), controller); // Register the on-demand container scanner with the container set used by the KeyValueHandler. containerSet.registerContainerScanHandler(onDemandScanner::scanContainer); } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 98579721f554..8d50f676c059 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -658,7 +658,7 @@ private KeyValueHandler createKeyValueHandler(Path path) throws IOException { ContainerController controller = new ContainerController(containerSet, Collections.singletonMap(ContainerType.KeyValueContainer, kvHandler)); OnDemandContainerDataScanner onDemandScanner = new OnDemandContainerDataScanner( - conf.getObject(ContainerScannerConfiguration.class), controller, kvHandler.getChecksumManager()); + conf.getObject(ContainerScannerConfiguration.class), controller); containerSet.registerContainerScanHandler(onDemandScanner::scanContainer); return kvHandler; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java index 4d12c11a77a6..ae13cfcee90c 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java @@ -61,13 +61,11 @@ public class TestBackgroundContainerDataScanner extends TestContainerScannersAbstract { private BackgroundContainerDataScanner scanner; - private ContainerChecksumTreeManager mockChecksumManager; @BeforeEach public void setup() { super.setup(); - mockChecksumManager = mock(ContainerChecksumTreeManager.class); - scanner = new BackgroundContainerDataScanner(conf, controller, vol, mockChecksumManager); + scanner = new BackgroundContainerDataScanner(conf, controller, vol); } @Test @@ -253,14 +251,14 @@ public void testMerkleTreeWritten() throws Exception { // Merkle trees should not be written for open or deleted containers for (Container container : Arrays.asList(openContainer, openCorruptMetadata, deletedContainer)) { - verify(mockChecksumManager, times(0)) - .writeContainerDataTree(eq(container.getContainerData()), any()); + verify(controller, times(0)) + .updateContainerChecksum(eq(container.getContainerData().getContainerID()), any()); } // Merkle trees should be written for all other containers. for (Container container : Arrays.asList(healthy, corruptData)) { - verify(mockChecksumManager, times(1)) - .writeContainerDataTree(eq(container.getContainerData()), any()); + verify(controller, times(1)) + .updateContainerChecksum(eq(container.getContainerData().getContainerID()), any()); } } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java index 548a6c61b034..eb87c0d94f90 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java @@ -68,13 +68,11 @@ public class TestOnDemandContainerDataScanner extends TestContainerScannersAbstract { private OnDemandContainerDataScanner onDemandScanner; - private ContainerChecksumTreeManager mockChecksumManager; @BeforeEach public void setup() { super.setup(); - mockChecksumManager = mock(ContainerChecksumTreeManager .class); - onDemandScanner = new OnDemandContainerDataScanner(conf, controller, mockChecksumManager); + onDemandScanner = new OnDemandContainerDataScanner(conf, controller); } @Test @@ -293,15 +291,15 @@ public void testMerkleTreeWritten() throws Exception { // Merkle trees should not be written for open or deleted containers for (Container container : Arrays.asList(openContainer, openCorruptMetadata, deletedContainer)) { scanContainer(container); - verify(mockChecksumManager, times(0)) - .writeContainerDataTree(eq(container.getContainerData()), any()); + verify(controller, times(0)) + .updateContainerChecksum(eq(container.getContainerData().getContainerID()), any()); } // Merkle trees should be written for all other containers. for (Container container : Arrays.asList(healthy, corruptData)) { scanContainer(container); - verify(mockChecksumManager, times(1)) - .writeContainerDataTree(eq(container.getContainerData()), any()); + verify(controller, times(1)) + .updateContainerChecksum(eq(container.getContainerData().getContainerID()), any()); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index ad1459466b38..63d0ebbfb0f2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -148,10 +148,6 @@ public static void init() throws Exception { conf.set(OZONE_METADATA_DIRS, testDir.getAbsolutePath()); conf.setStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, 128 * 1024, StorageUnit.BYTES); conf.setStorageSize(OZONE_SCM_BLOCK_SIZE, 512 * 1024, StorageUnit.BYTES); - // Disable the container scanner so it does not create merkle tree files that interfere with this test. - // TODO: Currently container scrub sets the checksum to 0, Revert this after HDDS-10374 is merged. - conf.getObject(ContainerScannerConfiguration.class).setEnabled(false); - conf.setBoolean("hdds.container.scrub.enabled", false); startMiniKdc(); setSecureConfig(); @@ -376,21 +372,14 @@ public void testContainerChecksumWithBlockMissing() throws Exception { db.getStore().flushDB(); } - // TODO: Use On-demand container scanner to build the new container merkle tree. (HDDS-10374) - Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); - kvHandler.createContainerMerkleTreeFromMetadata(container); + datanodeStateMachine.getContainer().getContainerSet().scanContainer(containerID); + waitForDataChecksumsAtSCM(containerID, 2); ContainerProtos.ContainerChecksumInfo containerChecksumAfterBlockDelete = readChecksumFile(container.getContainerData()); long dataChecksumAfterBlockDelete = containerChecksumAfterBlockDelete.getContainerMerkleTree().getDataChecksum(); // Checksum should have changed after block delete. assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); - // Since the container is already closed, we have manually updated the container checksum file. - // This doesn't update the checksum reported to SCM, and we need to trigger an ICR. - // Marking a container unhealthy will send an ICR. - kvHandler.markContainerUnhealthy(container, MetadataScanResult.deleted()); - waitForDataChecksumsAtSCM(containerID, 2); - // 3. Reconcile the container. cluster.getStorageContainerLocationClient().reconcileContainer(containerID); // Compare and check if dataChecksum is same on all replicas. @@ -460,9 +449,8 @@ public void testContainerChecksumChunkCorruption() throws Exception { db.getStore().flushDB(); } - Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); - kvHandler.createContainerMerkleTreeFromMetadata(container); - // To set unhealthy for chunks that are corrupted. + datanodeStateMachine.getContainer().getContainerSet().scanContainer(containerID); + waitForDataChecksumsAtSCM(containerID, 2); ContainerProtos.ContainerChecksumInfo containerChecksumAfterChunkCorruption = readChecksumFile(container.getContainerData()); long dataChecksumAfterAfterChunkCorruption = containerChecksumAfterChunkCorruption @@ -470,32 +458,6 @@ public void testContainerChecksumChunkCorruption() throws Exception { // Checksum should have changed after chunk corruption. assertNotEquals(oldDataChecksum, dataChecksumAfterAfterChunkCorruption); - // 3. Set Unhealthy for first chunk of all blocks. This should be done by the scanner, Until then this is a - // manual step. - // TODO: Use On-demand container scanner to build the new container merkle tree (HDDS-10374) - Random random = new Random(); - ContainerProtos.ContainerChecksumInfo.Builder builder = containerChecksumAfterChunkCorruption.toBuilder(); - List blockMerkleTreeList = builder.getContainerMerkleTree() - .getBlockMerkleTreeList(); - builder.getContainerMerkleTreeBuilder().clearBlockMerkleTree(); - for (ContainerProtos.BlockMerkleTree blockMerkleTree : blockMerkleTreeList) { - ContainerProtos.BlockMerkleTree.Builder blockMerkleTreeBuilder = blockMerkleTree.toBuilder(); - List chunkMerkleTreeBuilderList = - blockMerkleTreeBuilder.getChunkMerkleTreeBuilderList(); - chunkMerkleTreeBuilderList.get(0).setIsHealthy(false).setDataChecksum(random.nextLong()); - blockMerkleTreeBuilder.setDataChecksum(random.nextLong()); - builder.getContainerMerkleTreeBuilder().addBlockMerkleTree(blockMerkleTreeBuilder.build()); - } - builder.getContainerMerkleTreeBuilder().setDataChecksum(random.nextLong()); - Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); - writeContainerDataTreeProto(container.getContainerData(), builder.getContainerMerkleTree()); - - // Since the container is already closed, we have manually updated the container checksum file. - // This doesn't update the checksum reported to SCM, and we need to trigger an ICR. - // Marking a container unhealthy will send an ICR. - kvHandler.markContainerUnhealthy(container, MetadataScanResult.deleted()); - waitForDataChecksumsAtSCM(containerID, 2); - // 4. Reconcile the container. cluster.getStorageContainerLocationClient().reconcileContainer(containerID); // Compare and check if dataChecksum is same on all replicas. @@ -557,22 +519,15 @@ public void testDataChecksumReportedAtSCM() throws Exception { db.getStore().flushDB(); } - // TODO: Use On-demand container scanner to build the new container merkle tree. (HDDS-10374) - Files.deleteIfExists(getContainerChecksumFile(container.getContainerData()).toPath()); - kvHandler.createContainerMerkleTree(container); + datanodeStateMachine.getContainer().getContainerSet().scanContainer(containerID); + waitForDataChecksumsAtSCM(containerID, 2); ContainerProtos.ContainerChecksumInfo containerChecksumAfterBlockDelete = readChecksumFile(container.getContainerData()); long dataChecksumAfterBlockDelete = containerChecksumAfterBlockDelete.getContainerMerkleTree().getDataChecksum(); // Checksum should have changed after block delete. assertNotEquals(oldDataChecksum, dataChecksumAfterBlockDelete); - // Since the container is already closed, we have manually updated the container checksum file. - // This doesn't update the checksum reported to SCM, and we need to trigger an ICR. - // Marking a container unhealthy will send an ICR. - kvHandler.markContainerUnhealthy(container, MetadataScanResult.deleted()); - waitForDataChecksumsAtSCM(containerID, 2); scmClient.reconcileContainer(containerID); - waitForDataChecksumsAtSCM(containerID, 1); // Check non-zero checksum after container reconciliation containerReplicas = scmClient.getContainerReplicas(containerID, ClientVersion.CURRENT_VERSION); From 9b75957eb1e6173e97793396faf3971efb16f3cc Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 12 May 2025 23:07:29 -0400 Subject: [PATCH 40/62] Rename container scan helper --- .../ozoneimpl/BackgroundContainerDataScanner.java | 9 +++------ .../ozoneimpl/BackgroundContainerMetadataScanner.java | 4 ++-- ...ainerScannerMixin.java => ContainerScanHelper.java} | 10 +++------- .../ozoneimpl/OnDemandContainerDataScanner.java | 5 ++--- 4 files changed, 10 insertions(+), 18 deletions(-) rename hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/{ContainerScannerMixin.java => ContainerScanHelper.java} (93%) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java index 0229b3c8618e..a9e0e0661dc7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java @@ -19,13 +19,10 @@ import com.google.common.annotations.VisibleForTesting; import java.io.IOException; -import java.time.Instant; import java.util.Iterator; -import java.util.Optional; + import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; -import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; -import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.slf4j.Logger; @@ -48,7 +45,7 @@ public class BackgroundContainerDataScanner extends private final Canceler canceler; private static final String NAME_FORMAT = "ContainerDataScanner(%s)"; private final ContainerDataScannerMetrics metrics; - private final ContainerScannerMixin scannerMixin; + private final ContainerScanHelper scannerMixin; public BackgroundContainerDataScanner(ContainerScannerConfiguration conf, ContainerController controller, @@ -60,7 +57,7 @@ public BackgroundContainerDataScanner(ContainerScannerConfiguration conf, canceler = new Canceler(); this.metrics = ContainerDataScannerMetrics.create(volume.toString()); this.metrics.setStorageDirectory(volume.toString()); - this.scannerMixin = new ContainerScannerMixin(LOG, controller, metrics, conf); + this.scannerMixin = new ContainerScanHelper(LOG, controller, metrics, conf); } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java index f2b6e295b1b4..c82d23b15359 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java @@ -35,14 +35,14 @@ public class BackgroundContainerMetadataScanner extends LoggerFactory.getLogger(BackgroundContainerMetadataScanner.class); private final ContainerMetadataScannerMetrics metrics; private final ContainerController controller; - private final ContainerScannerMixin scannerMixin; + private final ContainerScanHelper scannerMixin; public BackgroundContainerMetadataScanner(ContainerScannerConfiguration conf, ContainerController controller) { super("ContainerMetadataScanner", conf.getMetadataScanInterval()); this.controller = controller; this.metrics = ContainerMetadataScannerMetrics.create(); - this.scannerMixin = new ContainerScannerMixin(LOG, controller, metrics, conf); + this.scannerMixin = new ContainerScanHelper(LOG, controller, metrics, conf); } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java similarity index 93% rename from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java rename to hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java index 9b1bad11c4ca..b3aa61862d5f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerMixin.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java @@ -17,33 +17,29 @@ package org.apache.hadoop.ozone.container.ozoneimpl; -import static org.apache.hadoop.hdds.HddsUtils.checksumToString; - import java.io.IOException; import java.time.Duration; import java.time.Instant; import java.util.Optional; import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; -import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; -import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; import org.slf4j.Logger; /** * Mixin to handle common data and metadata scan operations among background and on-demand scanners. */ -public class ContainerScannerMixin { +public class ContainerScanHelper { private final Logger log; private final ContainerController controller; private final AbstractContainerScannerMetrics metrics; private final long minScanGap; - public ContainerScannerMixin(Logger log, ContainerController controller, - AbstractContainerScannerMetrics metrics, ContainerScannerConfiguration conf) { + public ContainerScanHelper(Logger log, ContainerController controller, + AbstractContainerScannerMetrics metrics, ContainerScannerConfiguration conf) { this.log = log; this.controller = controller; this.metrics = metrics; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index 0083f5b82224..dcc2f2f8bcf9 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -26,7 +26,6 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; -import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,7 +43,7 @@ public final class OnDemandContainerDataScanner { private final ConcurrentHashMap .KeySetView containerRescheduleCheckSet; private final OnDemandScannerMetrics metrics; - private final ContainerScannerMixin scannerMixin; + private final ContainerScanHelper scannerMixin; public OnDemandContainerDataScanner( ContainerScannerConfiguration conf, ContainerController controller) { @@ -54,7 +53,7 @@ public OnDemandContainerDataScanner( metrics = OnDemandScannerMetrics.create(); scanExecutor = Executors.newSingleThreadExecutor(); containerRescheduleCheckSet = ConcurrentHashMap.newKeySet(); - this.scannerMixin = new ContainerScannerMixin(LOG, controller, metrics, conf); + this.scannerMixin = new ContainerScanHelper(LOG, controller, metrics, conf); } public Optional> scanContainer(Container container) { From f615275674740c98549c3c37b9fc7b29cfc2729b Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 12 May 2025 23:32:08 -0400 Subject: [PATCH 41/62] Add comment on failure type --- .../ozone/container/keyvalue/TestContainerCorruptions.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java index d8ae48ecd021..28482088fc27 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerCorruptions.java @@ -111,7 +111,10 @@ public enum TestContainerCorruptions { TRUNCATED_BLOCK((container, blockID) -> { File blockFile = getBlock(container, blockID); truncateFile(blockFile); - }, ContainerScanError.FailureType.MISSING_CHUNK); + }, + // This test completely removes all content from the block file. The scanner will see this as all the chunks in + // the block missing, hence MISSING_CHUNK instead of INCONSISTENT_CHUNK_LENGTH. + ContainerScanError.FailureType.MISSING_CHUNK); private final BiConsumer, Long> corruption; private final ContainerScanError.FailureType expectedResult; From dadc829bdaa8c1c96a3044c4c96b2908e395c982 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 12 May 2025 23:42:01 -0400 Subject: [PATCH 42/62] Fix checkstyle unique to this PR --- .../container/common/interfaces/Handler.java | 2 - .../container/keyvalue/KeyValueHandler.java | 3 +- .../BackgroundContainerDataScanner.java | 1 - .../ozoneimpl/ContainerController.java | 1 - .../ContainerMerkleTreeTestUtils.java | 22 ++++---- .../keyvalue/TestKeyValueContainerCheck.java | 51 +++++++++---------- .../keyvalue/TestKeyValueHandler.java | 1 - .../TestBackgroundContainerDataScanner.java | 35 ++++++------- .../TestOnDemandContainerDataScanner.java | 7 +-- .../TestContainerCommandReconciliation.java | 4 -- ...groundContainerDataScannerIntegration.java | 9 ++-- ...ndContainerMetadataScannerIntegration.java | 9 ++-- ...DemandContainerDataScannerIntegration.java | 9 ++-- 13 files changed, 67 insertions(+), 87 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java index ee6e702db561..e1db3634694b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType; import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException; -import org.apache.hadoop.ozone.common.Storage; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeWriter; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; @@ -40,7 +39,6 @@ import org.apache.hadoop.ozone.container.common.report.IncrementalReportSender; import org.apache.hadoop.ozone.container.common.transport.server.ratis.DispatcherContext; import org.apache.hadoop.ozone.container.common.volume.VolumeSet; -import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker; import org.apache.ratis.statemachine.StateMachine; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index b9d6f0cebb21..0060f2a80b0a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -308,8 +308,7 @@ public ContainerCommandResponseProto handle( @VisibleForTesting static ContainerCommandResponseProto dispatchRequest(KeyValueHandler handler, - ContainerCommandRequestProto request, KeyValueContainer kvContainer, - DispatcherContext dispatcherContext) { + ContainerCommandRequestProto request, KeyValueContainer kvContainer, DispatcherContext dispatcherContext) { Type cmdType = request.getCmdType(); // Validate the request has been made to the correct datanode with the node id matching. if (kvContainer != null) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java index a9e0e0661dc7..989ffb418e9d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java @@ -20,7 +20,6 @@ import com.google.common.annotations.VisibleForTesting; import java.io.IOException; import java.util.Iterator; - import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.ozone.container.common.interfaces.Container; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java index a7ddcc1eb175..b14fdf3f8d5e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java @@ -37,7 +37,6 @@ import org.apache.hadoop.ozone.container.common.interfaces.Handler; import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; -import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java index b6cd35773b45..4b969c00c87f 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java @@ -17,17 +17,6 @@ package org.apache.hadoop.ozone.container.checksum; -import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.hdds.conf.ConfigurationSource; -import org.apache.hadoop.hdds.conf.StorageUnit; -import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; -import org.apache.hadoop.hdds.scm.OzoneClientConfig; -import org.apache.hadoop.hdds.scm.ScmConfigKeys; -import org.apache.hadoop.ozone.HddsDatanodeService; -import org.apache.hadoop.ozone.container.common.impl.ContainerData; -import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; -import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import static org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager.getContainerChecksumFile; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -47,6 +36,17 @@ import java.util.Random; import java.util.Set; import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.conf.StorageUnit; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.ozone.HddsDatanodeService; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; /** * Helper methods for testing container checksum tree files and container reconciliation. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java index f08cf10fa0cd..51a1d774ead6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java @@ -17,6 +17,30 @@ package org.apache.hadoop.ozone.container.keyvalue; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.CORRUPT_BLOCK; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.CORRUPT_CONTAINER_FILE; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_BLOCK; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CHUNKS_DIR; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CONTAINER_DIR; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CONTAINER_FILE; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_METADATA_DIR; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.TRUNCATED_BLOCK; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.TRUNCATED_CONTAINER_FILE; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; + +import java.io.File; +import java.io.RandomAccessFile; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -32,8 +56,8 @@ import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerLocationUtil; -import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScanError.FailureType; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScanError; +import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScanError.FailureType; import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; import org.apache.hadoop.ozone.container.ozoneimpl.DataScanResult; import org.junit.jupiter.params.ParameterizedTest; @@ -42,31 +66,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.io.RandomAccessFile; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.CORRUPT_BLOCK; -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.CORRUPT_CONTAINER_FILE; -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_BLOCK; -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CHUNKS_DIR; -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CONTAINER_DIR; -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CONTAINER_FILE; -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_METADATA_DIR; -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.TRUNCATED_BLOCK; -import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.TRUNCATED_CONTAINER_FILE; -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.mock; - /** * Test the KeyValueContainerCheck class's ability to detect container errors. */ diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 8d50f676c059..7530a33327a3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -56,7 +56,6 @@ import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.fs.FileUtil; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java index ae13cfcee90c..84b001d9d79b 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java @@ -17,24 +17,6 @@ package org.apache.hadoop.ozone.container.ozoneimpl; -import org.apache.hadoop.hdfs.util.Canceler; -import org.apache.hadoop.hdfs.util.DataTransferThrottler; -import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; -import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; -import org.apache.hadoop.ozone.container.common.impl.ContainerData; -import org.apache.hadoop.ozone.container.common.interfaces.Container; -import org.apache.ozone.test.GenericTestUtils; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.BeforeEach; -import org.mockito.junit.jupiter.MockitoSettings; -import org.mockito.quality.Strictness; - -import java.time.Duration; -import java.util.Arrays; -import java.util.Optional; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; - import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.UNHEALTHY; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.getHealthyMetadataScanResult; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.getUnhealthyDataScanResult; @@ -46,13 +28,28 @@ import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.atMost; import static org.mockito.Mockito.atMostOnce; -import static org.mockito.Mockito.mock; import static org.mockito.Mockito.eq; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.time.Duration; +import java.util.Arrays; +import java.util.Optional; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hdfs.util.Canceler; +import org.apache.hadoop.hdfs.util.DataTransferThrottler; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.ozone.container.common.impl.ContainerData; +import org.apache.hadoop.ozone.container.common.interfaces.Container; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; + /** * Unit tests for the background container data scanner. */ diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java index eb87c0d94f90..bcdb7a173ccf 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java @@ -31,12 +31,14 @@ import static org.mockito.Mockito.atMostOnce; import static org.mockito.Mockito.eq; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; import java.time.Duration; import java.util.ArrayList; +import java.util.Arrays; import java.util.Optional; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; @@ -45,7 +47,6 @@ import org.apache.hadoop.hdfs.util.Canceler; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; -import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; import org.apache.hadoop.ozone.container.common.impl.ContainerData; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.ScanResult; @@ -56,10 +57,6 @@ import org.mockito.quality.Strictness; import org.mockito.stubbing.Answer; -import java.util.Arrays; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; - /** * Unit tests for the on-demand container scanner. */ diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 63d0ebbfb0f2..dfdf1c9ae4b2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -43,7 +43,6 @@ import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree; import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; -import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.writeContainerDataTreeProto; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_KERBEROS_KEYTAB_FILE; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_KERBEROS_PRINCIPAL_KEY; import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_KERBEROS_KEYTAB_FILE_KEY; @@ -62,7 +61,6 @@ import java.nio.file.StandardOpenOption; import java.util.List; import java.util.Properties; -import java.util.Random; import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; @@ -106,8 +104,6 @@ import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; -import org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration; -import org.apache.hadoop.ozone.container.ozoneimpl.MetadataScanResult; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.security.UserGroupInformation; import org.apache.ozone.test.GenericTestUtils; diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java index b00dd93a5649..e63dc94873fd 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java @@ -17,7 +17,11 @@ package org.apache.hadoop.ozone.dn.scanner; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -33,11 +37,6 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; -import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; -import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - /** * Integration tests for the background container data scanner. This scanner * checks all data and metadata in the container. diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java index 4abbf1a303cd..1672609a74ba 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java @@ -17,7 +17,11 @@ package org.apache.hadoop.ozone.dn.scanner; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.time.Duration; import java.util.Collection; @@ -35,11 +39,6 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; -import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; -import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - /** * Integration tests for the background container metadata scanner. This * scanner does a quick check of container metadata to find obvious failures diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java index dc5046833cd8..9df46a384335 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java @@ -17,7 +17,11 @@ package org.apache.hadoop.ozone.dn.scanner; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Collection; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -32,11 +36,6 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; -import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; -import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - /** * Integration tests for the on demand container data scanner. This scanner * is triggered when there is an error while a client interacts with a From cc55527b5d30fd770fc6c5458e7560161d81fee6 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 14 May 2025 15:27:22 -0400 Subject: [PATCH 43/62] Fix sending ICR when only checksum changes (pending test) --- .../container/common/interfaces/Handler.java | 4 +- .../container/keyvalue/KeyValueHandler.java | 126 +++++++++--------- .../ozoneimpl/ContainerController.java | 4 +- .../ozoneimpl/ContainerScanHelper.java | 6 +- 4 files changed, 75 insertions(+), 65 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java index e1db3634694b..69de6fbe4243 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java @@ -158,8 +158,8 @@ public abstract void markContainerForClose(Container container) * * @param container The container to update * @param treeWriter The container merkle tree with the updated information about the container - * @throws IOException For errors sending an ICR. If updating the checksums on disk fails, the checksum information - * will remain unchanged with no exception thrown. + * @throws IOException For errors sending an ICR or updating the container checksum on disk. If the disk update + * fails, the checksum in memory will not be updated. */ public abstract void updateContainerChecksum(Container container, ContainerMerkleTreeWriter treeWriter) throws IOException; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 86ed23e6f601..b2d8213208cb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -626,30 +626,6 @@ ContainerCommandResponseProto handleCloseContainer( return getSuccessResponse(request); } - /** - * Write the merkle tree for this container using the existing checksum metadata only. The data is not read or - * validated by this method, so it is expected to run quickly. - *

- * If a checksum file already exists on the disk, this method will do nothing. The existing file would have either - * been made from the metadata or data itself so there is no need to recreate it from the metadata. - *

- * - * @param container The container which will have a tree generated. - */ - private void updateContainerChecksumFromMetadata(Container container) { - if (ContainerChecksumTreeManager.checksumFileExist(container)) { - return; - } - - try { - KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); - updateAndGetContainerChecksumFromMetadata(containerData); - } catch (IOException ex) { - LOG.error("Cannot create container checksum for container {} , Exception: ", - container.getContainerData().getContainerID(), ex); - } - } - /** * Handle Put Block operation. Calls BlockManager to process the request. */ @@ -1399,34 +1375,85 @@ public void markContainerForClose(Container container) } finally { container.writeUnlock(); } - updateContainerChecksumFromMetadata(container); + updateContainerChecksumFromMetadataIfNeeded(container); ContainerLogger.logClosing(container.getContainerData()); sendICR(container); } @Override public void updateContainerChecksum(Container container, ContainerMerkleTreeWriter treeWriter) - throws StorageContainerException { + throws IOException { + updateAndGetContainerChecksum(container, treeWriter, true); + } + + /** + * Write the merkle tree for this container using the existing checksum metadata only. The data is not read or + * validated by this method, so it is expected to run quickly. + *

+ * If a checksum file already exists on the disk, this method will do nothing. The existing file would have either + * been made from the metadata or data itself so there is no need to recreate it from the metadata. This method + * does not send an ICR with the updated checksum info. + *

+ * + * @param container The container which will have a tree generated. + */ + private void updateContainerChecksumFromMetadataIfNeeded(Container container) { + if (ContainerChecksumTreeManager.checksumFileExist(container)) { + return; + } + + try { + KeyValueContainer keyValueContainer = (KeyValueContainer) container; + updateAndGetContainerChecksumFromMetadata(keyValueContainer); + } catch (IOException ex) { + LOG.error("Cannot create container checksum for container {} , Exception: ", + container.getContainerData().getContainerID(), ex); + } + } + + /** + * Updates the container merkle tree based on the RocksDb's block metadata and returns the updated checksum info. + * This method does not send an ICR with the updated checksum info. + * @param container - Container for which the container merkle tree needs to be updated. + */ + private ContainerProtos.ContainerChecksumInfo updateAndGetContainerChecksumFromMetadata( + KeyValueContainer container) throws IOException { + ContainerMerkleTreeWriter merkleTree = new ContainerMerkleTreeWriter(); + try (DBHandle dbHandle = BlockUtils.getDB(container.getContainerData(), conf); + BlockIterator blockIterator = dbHandle.getStore(). + getBlockIterator(container.getContainerData().getContainerID())) { + while (blockIterator.hasNext()) { + BlockData blockData = blockIterator.nextBlock(); + merkleTree.addBlock(blockData.getLocalID()); + // Assume all chunks are healthy when building the tree from metadata. Scanner will identify corruption when + // it runs after. + List chunkInfos = blockData.getChunks(); + merkleTree.addChunks(blockData.getLocalID(), true, chunkInfos); + } + } + return updateAndGetContainerChecksum(container, merkleTree, false); + } + + private ContainerProtos.ContainerChecksumInfo updateAndGetContainerChecksum(Container container, + ContainerMerkleTreeWriter treeWriter, boolean sendICR) throws IOException { ContainerData containerData = container.getContainerData(); // Attempt to write the new data checksum to disk. If persisting this fails, keep using the original data // checksum to prevent divergence from what SCM sees in the ICR vs what datanode peers will see when pulling the // merkle tree. long originalDataChecksum = containerData.getDataChecksum(); - long updatedDataChecksum = originalDataChecksum; - try { - updatedDataChecksum = - checksumManager.writeContainerDataTree(containerData, treeWriter).getContainerMerkleTree().getDataChecksum(); - } catch (IOException ex) { - LOG.error("Failed to write container merkle tree for container {}", containerData.getContainerID(), ex); - } + ContainerProtos.ContainerChecksumInfo updateChecksumInfo = checksumManager.writeContainerDataTree(containerData, + treeWriter); + long updatedDataChecksum = updateChecksumInfo.getContainerMerkleTree().getDataChecksum(); if (updatedDataChecksum != originalDataChecksum) { containerData.setDataChecksum(updatedDataChecksum); String message = "Container data checksum updated from " + checksumToString(originalDataChecksum) + " to " + checksumToString(updatedDataChecksum); - sendICR(container); + if (sendICR) { + sendICR(container); + } if (ContainerChecksumTreeManager.hasContainerChecksumFile(containerData)) { LOG.warn(message); ContainerLogger.logChecksumUpdated(containerData, originalDataChecksum); @@ -1436,6 +1463,7 @@ public void updateContainerChecksum(Container container, ContainerMerkleTreeWrit LOG.debug(message); } } + return updateChecksumInfo; } @Override @@ -1466,7 +1494,7 @@ public void markContainerUnhealthy(Container container, ScanResult reason) } finally { container.writeUnlock(); } - updateContainerChecksumFromMetadata(container); + updateContainerChecksumFromMetadataIfNeeded(container); // Even if the container file is corrupted/missing and the unhealthy // update fails, the unhealthy state is kept in memory and sent to // SCM. Write a corresponding entry to the container log as well. @@ -1497,7 +1525,7 @@ public void quasiCloseContainer(Container container, String reason) } finally { container.writeUnlock(); } - updateContainerChecksumFromMetadata(container); + updateContainerChecksumFromMetadataIfNeeded(container); ContainerLogger.logQuasiClosed(container.getContainerData(), reason); sendICR(container); } @@ -1531,7 +1559,7 @@ public void closeContainer(Container container) } finally { container.writeUnlock(); } - updateContainerChecksumFromMetadata(container); + updateContainerChecksumFromMetadataIfNeeded(container); ContainerLogger.logClosed(container.getContainerData()); sendICR(container); } @@ -1557,7 +1585,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container originalChecksumInfo = optionalChecksumInfo.get(); } else { // Try creating the checksum info from RocksDB metadata if it is not present. - originalChecksumInfo = updateAndGetContainerChecksumFromMetadata(containerData); + originalChecksumInfo = updateAndGetContainerChecksumFromMetadata(kvContainer); } // This holds our current most up-to-date checksum info that we are using for the container. ContainerProtos.ContainerChecksumInfo latestChecksumInfo = originalChecksumInfo; @@ -1698,28 +1726,6 @@ containerID, peer, checksumToString(previousDataChecksum), checksumToString(late sendICR(container); } - /** - * Updates the container merkle tree based on the RocksDb's block metadata and returns the updated checksum info. - * @param containerData - Container data for which the container merkle tree needs to be updated. - */ - private ContainerProtos.ContainerChecksumInfo updateAndGetContainerChecksumFromMetadata( - KeyValueContainerData containerData) throws IOException { - ContainerMerkleTreeWriter merkleTree = new ContainerMerkleTreeWriter(); - try (DBHandle dbHandle = BlockUtils.getDB(containerData, conf); - BlockIterator blockIterator = dbHandle.getStore(). - getBlockIterator(containerData.getContainerID())) { - while (blockIterator.hasNext()) { - BlockData blockData = blockIterator.nextBlock(); - merkleTree.addBlock(blockData.getLocalID()); - // Assume all chunks are healthy when building the tree from metadata. Scanner will identify corruption when - // it runs after. - List chunkInfos = blockData.getChunks(); - merkleTree.addChunks(blockData.getLocalID(), true, chunkInfos); - } - } - return checksumManager.writeContainerDataTree(containerData, merkleTree); - } - /** * Read chunks from a peer datanode and use them to repair our container. * diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java index e8f1316767e9..9f328fee4de7 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerController.java @@ -130,8 +130,8 @@ public boolean markContainerUnhealthy(final long containerId, ScanResult reason) * * @param containerId The ID of the container to update * @param treeWriter The container merkle tree with the updated information about the container - * @throws IOException For errors sending an ICR. If updating the checksums on disk fails, the checksum information - * will remain unchanged with no exception thrown. + * @throws IOException For errors sending an ICR or updating the container checksum on disk. If the disk update + * fails, the checksum in memory will not be updated. */ public void updateContainerChecksum(long containerId, ContainerMerkleTreeWriter treeWriter) throws IOException { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java index b3aa61862d5f..e056802d716b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java @@ -59,7 +59,11 @@ public void scanData(Container container, DataTransferThrottler throttler, Ca if (result.isDeleted()) { log.debug("Container [{}] has been deleted during the data scan.", containerId); } else { - controller.updateContainerChecksum(containerId, result.getDataTree()); + try { + controller.updateContainerChecksum(containerId, result.getDataTree()); + } catch (IOException ex) { + log.warn("Failed to update container checksum after scan of container {}", containerId, ex); + } if (!result.isHealthy()) { handleUnhealthyScanResult(containerId, result); } From 35879b44487aef41943422d08ae92b5fd337373d Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 14 May 2025 15:54:33 -0400 Subject: [PATCH 44/62] Updates after reviewing diff --- .../ozone/container/keyvalue/KeyValueHandler.java | 12 +++++------- .../ozoneimpl/BackgroundContainerDataScanner.java | 6 +++--- .../BackgroundContainerMetadataScanner.java | 8 ++++---- .../container/ozoneimpl/ContainerScanHelper.java | 5 ++--- .../ozoneimpl/OnDemandContainerDataScanner.java | 8 ++++---- .../TestKeyValueHandlerWithUnhealthyContainer.java | 2 ++ .../dist/src/main/smoketest/admincli/container.robot | 1 - 7 files changed, 20 insertions(+), 22 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index b2d8213208cb..58458fa4e5eb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -508,7 +508,7 @@ ContainerCommandResponseProto handleCreateContainer( } private void populateContainerPathFields(KeyValueContainer container, - HddsVolume hddsVolume) throws IOException { + HddsVolume hddsVolume) throws IOException { volumeSet.readLock(); try { String idDir = VersionedDatanodeFeatures.ScmHA.chooseContainerPathID( @@ -982,7 +982,7 @@ private void validateChunkChecksumData(ChunkBufferToByteString data, ChunkInfo i if (validateChunkChecksumData) { try { if (data instanceof ChunkBuffer) { - final ChunkBuffer b = (ChunkBuffer) data; + final ChunkBuffer b = (ChunkBuffer)data; Checksum.verifyChecksum(b.duplicate(b.position(), b.limit()), info.getChecksumData(), 0); } else { Checksum.verifyChecksum(data.toByteString(byteBufferToByteString), info.getChecksumData(), 0); @@ -1101,7 +1101,6 @@ public void writeChunkForClosedContainer(ChunkInfo chunkInfo, BlockID blockID, /** * Handle Put Block operation for closed container. Calls BlockManager to process the request. * This is primarily used by container reconciliation process to persist the block data for closed container. - * * @param kvContainer - Container for which block data need to be persisted. * @param blockData - Block Data to be persisted (BlockData should have the chunks). * @param blockCommitSequenceId - Block Commit Sequence ID for the block. @@ -1267,7 +1266,6 @@ ContainerCommandResponseProto handleUnsupportedOp( /** * Check if container is open. Throw exception otherwise. - * * @param kvContainer * @throws StorageContainerException */ @@ -1312,7 +1310,6 @@ private void checkContainerOpen(KeyValueContainer kvContainer) /** * Check if container is Closed. - * * @param kvContainer */ private boolean checkContainerClose(KeyValueContainer kvContainer) { @@ -1326,8 +1323,9 @@ private boolean checkContainerClose(KeyValueContainer kvContainer) { @Override public Container importContainer(ContainerData originalContainerData, - final InputStream rawContainerStream, - final TarContainerPacker packer) throws IOException { + final InputStream rawContainerStream, + final TarContainerPacker packer) + throws IOException { Preconditions.checkState(originalContainerData instanceof KeyValueContainerData, "Should be KeyValueContainerData instance"); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java index 989ffb418e9d..9941341fdde5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java @@ -44,7 +44,7 @@ public class BackgroundContainerDataScanner extends private final Canceler canceler; private static final String NAME_FORMAT = "ContainerDataScanner(%s)"; private final ContainerDataScannerMetrics metrics; - private final ContainerScanHelper scannerMixin; + private final ContainerScanHelper scanHelper; public BackgroundContainerDataScanner(ContainerScannerConfiguration conf, ContainerController controller, @@ -56,7 +56,7 @@ public BackgroundContainerDataScanner(ContainerScannerConfiguration conf, canceler = new Canceler(); this.metrics = ContainerDataScannerMetrics.create(volume.toString()); this.metrics.setStorageDirectory(volume.toString()); - this.scannerMixin = new ContainerScanHelper(LOG, controller, metrics, conf); + this.scanHelper = new ContainerScanHelper(LOG, controller, metrics, conf); } @Override @@ -68,7 +68,7 @@ public void scanContainer(Container c) shutdown("The volume has failed."); return; } - scannerMixin.scanData(c, throttler, canceler); + scanHelper.scanData(c, throttler, canceler); } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java index c82d23b15359..51fbc0de6725 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java @@ -35,14 +35,14 @@ public class BackgroundContainerMetadataScanner extends LoggerFactory.getLogger(BackgroundContainerMetadataScanner.class); private final ContainerMetadataScannerMetrics metrics; private final ContainerController controller; - private final ContainerScanHelper scannerMixin; + private final ContainerScanHelper scanHelper; public BackgroundContainerMetadataScanner(ContainerScannerConfiguration conf, ContainerController controller) { super("ContainerMetadataScanner", conf.getMetadataScanInterval()); this.controller = controller; this.metrics = ContainerMetadataScannerMetrics.create(); - this.scannerMixin = new ContainerScanHelper(LOG, controller, metrics, conf); + this.scanHelper = new ContainerScanHelper(LOG, controller, metrics, conf); } @Override @@ -54,7 +54,7 @@ public Iterator> getContainerIterator() { @Override public void scanContainer(Container container) throws IOException, InterruptedException { - if (!scannerMixin.shouldScanMetadata(container)) { + if (!scanHelper.shouldScanMetadata(container)) { return; } @@ -66,7 +66,7 @@ public void scanContainer(Container container) return; } if (!result.isHealthy()) { - scannerMixin.handleUnhealthyScanResult(containerID, result); + scanHelper.handleUnhealthyScanResult(containerID, result); } // Do not update the scan timestamp after the scan since this was just a diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java index e056802d716b..c7acbd7de95c 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java @@ -88,7 +88,7 @@ public void handleUnhealthyScanResult(long containerID, ScanResult result) throw } // Only increment the number of unhealthy containers if the container was not already unhealthy. - // TODO HDDS-11593 (to be merged in to the feature branch from master): Scanner counters will start from zero + // TODO HDDS-11593: Scanner counters will start from zero // at the beginning of each run, so this will need to be incremented for every unhealthy container seen // regardless of its previous state. boolean containerMarkedUnhealthy = controller.markContainerUnhealthy(containerID, result); @@ -109,8 +109,7 @@ public boolean shouldScanMetadata(Container container) { return false; } -// return !recentlyScanned(container.getContainerData()); - return true; + return !recentlyScanned(container.getContainerData()); } public boolean shouldScanData(Container container) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index dcc2f2f8bcf9..3c6f8fb2c7a4 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -43,7 +43,7 @@ public final class OnDemandContainerDataScanner { private final ConcurrentHashMap .KeySetView containerRescheduleCheckSet; private final OnDemandScannerMetrics metrics; - private final ContainerScanHelper scannerMixin; + private final ContainerScanHelper scannerHelper; public OnDemandContainerDataScanner( ContainerScannerConfiguration conf, ContainerController controller) { @@ -53,11 +53,11 @@ public OnDemandContainerDataScanner( metrics = OnDemandScannerMetrics.create(); scanExecutor = Executors.newSingleThreadExecutor(); containerRescheduleCheckSet = ConcurrentHashMap.newKeySet(); - this.scannerMixin = new ContainerScanHelper(LOG, controller, metrics, conf); + this.scannerHelper = new ContainerScanHelper(LOG, controller, metrics, conf); } public Optional> scanContainer(Container container) { - if (!scannerMixin.shouldScanData(container)) { + if (!scannerHelper.shouldScanData(container)) { return Optional.empty(); } @@ -83,7 +83,7 @@ private void removeContainerFromScheduledContainers( private void performOnDemandScan(Container container) { try { - scannerMixin.scanData(container, throttler, canceler); + scannerHelper.scanData(container, throttler, canceler); } catch (IOException e) { LOG.warn("Unexpected exception while scanning container " + container.getContainerData().getContainerID(), e); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java index 515ed65bbc15..dd557beff3e9 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java @@ -28,6 +28,7 @@ import static org.apache.hadoop.ozone.container.ContainerTestHelper.getWriteChunkRequest; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.any; import static org.mockito.Mockito.atMostOnce; import static org.mockito.Mockito.mock; @@ -249,6 +250,7 @@ public void testMarkContainerUnhealthyInFailedVolume() throws IOException { // unhealthy. hddsVolume.setState(StorageVolume.VolumeState.NORMAL); handler.markContainerUnhealthy(container, ContainerTestUtils.getUnhealthyDataScanResult()); + assertTrue(ContainerChecksumTreeManager.checksumFileExist(container)); verify(mockIcrSender, atMostOnce()).send(any()); } diff --git a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot index b3923af90c65..91207a7911ce 100644 --- a/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot +++ b/hadoop-ozone/dist/src/main/smoketest/admincli/container.robot @@ -150,7 +150,6 @@ Close container Wait until keyword succeeds 1min 10sec Container is closed ${container} Reconcile closed container -<<<<<<< HEAD ${container} = Execute ozone admin container list --state CLOSED | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -1 ${data_checksum} = Execute ozone admin container info "${container}" --json | jq -r '.replicas[].dataChecksum' | head -n1 # Once the container is closed, the data checksum should be populated From 1ab8c144d520e5025e6677b3a833da62bc18f122 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 14 May 2025 18:25:44 -0400 Subject: [PATCH 45/62] Add unit test for KeyValueHandler#updateContainerChecksum --- .../keyvalue/TestKeyValueHandler.java | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index 7530a33327a3..c0c442bb780e 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -26,9 +26,12 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_LAYOUT_KEY; import static org.apache.hadoop.ozone.OzoneConsts.GB; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.assertTreesSortedAndMatch; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.buildTestTree; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.createBlockMetaData; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; @@ -53,6 +56,7 @@ import java.util.EnumSet; import java.util.HashMap; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; @@ -71,6 +75,8 @@ import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.security.token.TokenVerifier; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; +import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils; +import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeWriter; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; import org.apache.hadoop.ozone.container.common.ContainerTestUtils; import org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics; @@ -530,6 +536,53 @@ public void testContainerChecksumInvocation(ContainerLayoutVersion layoutVersion Assertions.assertEquals(1, icrCount.get()); } + @ContainerLayoutTestInfo.ContainerTest + public void testUpdateContainerChecksum(ContainerLayoutVersion layoutVersion) throws Exception { + conf = new OzoneConfiguration(); + KeyValueContainerData data = new KeyValueContainerData(123L, layoutVersion, GB, + PipelineID.randomId().toString(), randomDatanodeDetails().getUuidString()); + data.setMetadataPath(tempDir.toString()); + data.setDbFile(dbFile.toFile()); + KeyValueContainer container = new KeyValueContainer(data, conf); + KeyValueContainerData containerData = container.getContainerData(); + ContainerSet containerSet = new ContainerSet(1000); + containerSet.addContainer(container); + + // Allows checking the invocation count of the lambda. + AtomicInteger icrCount = new AtomicInteger(0); + ContainerMerkleTreeWriter treeWriter = buildTestTree(conf); + final long updatedDataChecksum = treeWriter.toProto().getDataChecksum(); + IncrementalReportSender icrSender = c -> { + // Check that the ICR contains expected info about the container. + ContainerReplicaProto report = c.getContainerReport(); + long reportedID = report.getContainerID(); + Assertions.assertEquals(containerData.getContainerID(), reportedID); + + assertEquals(updatedDataChecksum, report.getDataChecksum()); + icrCount.incrementAndGet(); + }; + + ContainerChecksumTreeManager checksumManager = new ContainerChecksumTreeManager(conf); + KeyValueHandler keyValueHandler = new KeyValueHandler(conf, randomDatanodeDetails().getUuidString(), containerSet, + mock(MutableVolumeSet.class), mock(ContainerMetrics.class), icrSender, checksumManager); + + + // Initially, container should have no checksum information. + assertEquals(0, containerData.getDataChecksum()); + assertFalse(checksumManager.read(containerData).isPresent()); + assertEquals(0, icrCount.get()); + + // Update container with checksum information. + keyValueHandler.updateContainerChecksum(container, treeWriter); + // Check ICR sent. The ICR sender verifies that the expected checksum is present in the report. + assertEquals(1, icrCount.get()); + // Check checksum in memory. + assertEquals(updatedDataChecksum, containerData.getDataChecksum()); + // Check disk content. + ContainerProtos.ContainerChecksumInfo checksumInfo = checksumManager.read(containerData).get(); + assertTreesSortedAndMatch(treeWriter.toProto(), checksumInfo.getContainerMerkleTree()); + } + @Test public void testGetContainerChecksumInfoOnInvalidContainerStates() { when(handler.handleGetContainerChecksumInfo(any(), any())).thenCallRealMethod(); @@ -632,6 +685,20 @@ private static ContainerCommandRequestProto createContainerRequest( .build(); } + private KeyValueContainer addContainer(KeyValueHandler keyValueHandler, ContainerLayoutVersion layoutVersion) { + KeyValueContainerData kvData = new KeyValueContainerData(DUMMY_CONTAINER_ID, + layoutVersion, + (long) StorageUnit.GB.toBytes(1), UUID.randomUUID().toString(), + UUID.randomUUID().toString()); + kvData.setMetadataPath(tempDir.toString()); + kvData.setDbFile(dbFile.toFile()); + KeyValueContainer container = new KeyValueContainer(kvData, conf); + ContainerCommandRequestProto createContainerRequest = + createContainerRequest(DATANODE_UUID, DUMMY_CONTAINER_ID); + keyValueHandler.handleCreateContainer(createContainerRequest, container); + return container; + } + private KeyValueHandler createKeyValueHandler(Path path) throws IOException { final ContainerSet containerSet = new ContainerSet(1000); From 6c8be07aa379e459e6cf25936966596e6bf6e8f7 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 14 May 2025 18:58:42 -0400 Subject: [PATCH 46/62] Improve and update scanner integration tests --- ...kgroundContainerDataScannerIntegration.java | 17 ++++++++++++++++- ...nDemandContainerDataScannerIntegration.java | 18 +++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java index e63dc94873fd..2306af221c43 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java @@ -19,12 +19,15 @@ import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.concurrent.TimeUnit; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; @@ -96,7 +99,19 @@ void testCorruptionDetected(TestContainerCorruptions corruption) // Wait for SCM to get a report of the unhealthy replica with a different checksum than before. waitForScmToSeeReplicaState(containerID, UNHEALTHY); long newReportedDataChecksum = getContainerReplica(containerID).getDataChecksum(); - assertNotEquals(initialReportedDataChecksum, newReportedDataChecksum); + if (corruption == TestContainerCorruptions.MISSING_METADATA_DIR || + corruption == TestContainerCorruptions.MISSING_CONTAINER_DIR) { + // In these cases, the new tree will not be able to be written since it exists in the metadata directory. + // When the tree write fails, the in-memory checksum should remain at its original value. + assertEquals(initialReportedDataChecksum, newReportedDataChecksum); + assertFalse(containerChecksumFileExists(containerID)); + } else { + assertNotEquals(initialReportedDataChecksum, newReportedDataChecksum); + // Test that the scanner wrote updated checksum info to the disk. + assertTrue(containerChecksumFileExists(containerID)); + ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = readChecksumFile(container.getContainerData()); + assertEquals(newReportedDataChecksum, updatedChecksumInfo.getContainerMerkleTree().getDataChecksum()); + } if (corruption == TestContainerCorruptions.TRUNCATED_BLOCK || corruption == TestContainerCorruptions.CORRUPT_BLOCK) { diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java index 9df46a384335..d3b3ed46fdeb 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestOnDemandContainerDataScannerIntegration.java @@ -19,12 +19,15 @@ import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; +import static org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Collection; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; @@ -119,6 +122,19 @@ void testCorruptionDetected(TestContainerCorruptions corruption) waitForScmToSeeReplicaState(containerID, UNHEALTHY); corruption.assertLogged(containerID, 1, logCapturer); long newReportedDataChecksum = getContainerReplica(containerID).getDataChecksum(); - assertNotEquals(initialReportedDataChecksum, newReportedDataChecksum); + + if (corruption == TestContainerCorruptions.MISSING_METADATA_DIR || + corruption == TestContainerCorruptions.MISSING_CONTAINER_DIR) { + // In these cases, the new tree will not be able to be written since it exists in the metadata directory. + // When the tree write fails, the in-memory checksum should remain at its original value. + assertEquals(initialReportedDataChecksum, newReportedDataChecksum); + assertFalse(containerChecksumFileExists(containerID)); + } else { + assertNotEquals(initialReportedDataChecksum, newReportedDataChecksum); + // Test that the scanner wrote updated checksum info to the disk. + assertTrue(containerChecksumFileExists(containerID)); + ContainerProtos.ContainerChecksumInfo updatedChecksumInfo = readChecksumFile(container.getContainerData()); + assertEquals(newReportedDataChecksum, updatedChecksumInfo.getContainerMerkleTree().getDataChecksum()); + } } } From 60a1a6e2a4fea0e12156fdd6f7a918fd4d49383b Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 14 May 2025 19:27:14 -0400 Subject: [PATCH 47/62] Add unit tests that checksum update failure does not stop container state update --- .../TestBackgroundContainerDataScanner.java | 13 +++++++++++++ .../TestBackgroundContainerMetadataScanner.java | 17 +++++++++++++++++ .../TestContainerScannersAbstract.java | 7 +++++++ .../TestOnDemandContainerDataScanner.java | 13 +++++++++++++ 4 files changed, 50 insertions(+) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java index 84b001d9d79b..6707bf064267 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java @@ -24,16 +24,19 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.any; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.atMost; import static org.mockito.Mockito.atMostOnce; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.eq; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.io.IOException; import java.time.Duration; import java.util.Arrays; import java.util.Optional; @@ -193,6 +196,16 @@ public void testUnhealthyContainerRescanned() throws Exception { assertEquals(1, metrics.getNumUnHealthyContainers()); } + @Test + @Override + public void testChecksumUpdateFailure() throws Exception { + doThrow(new IOException("Checksum update error for testing")).when(controller) + .updateContainerChecksum(anyLong(), any()); + scanner.runIteration(); + verifyContainerMarkedUnhealthy(corruptData, atMostOnce()); + verify(corruptData.getContainerData(), atMostOnce()).setState(UNHEALTHY); + } + /** * A datanode will have one background data scanner per volume. When the * volume fails, the scanner thread should be terminated. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java index b34950748895..3c741e3d9a7c 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java @@ -24,15 +24,18 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.any; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.atMost; import static org.mockito.Mockito.atMostOnce; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.io.IOException; import java.time.Duration; import java.util.Optional; import java.util.concurrent.CountDownLatch; @@ -163,6 +166,20 @@ public void testUnhealthyContainerRescanned() throws Exception { assertEquals(1, metrics.getNumUnHealthyContainers()); } + /** + * Metadata scanner should not update container checksum, so any errors that may be injected here should have no + * effect. + */ + @Test + @Override + public void testChecksumUpdateFailure() throws Exception { + doThrow(new IOException("Checksum update error for testing")).when(controller) + .updateContainerChecksum(anyLong(), any()); + scanner.runIteration(); + verifyContainerMarkedUnhealthy(openCorruptMetadata, atMostOnce()); + verify(openCorruptMetadata.getContainerData(), atMostOnce()).setState(UNHEALTHY); + } + /** * A datanode will have one metadata scanner thread for the whole process. * When a volume fails, any the containers queued for scanning in that volume diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java index abacf13b37eb..1a8cb2afa5a8 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java @@ -124,6 +124,13 @@ public abstract void testPreviouslyScannedContainerIsScanned() @Test public abstract void testUnhealthyContainerRescanned() throws Exception; + /** + * When the container checksum cannot be updated, the scan should still complete and move the container state without + * throwing an exception. + */ + @Test + public abstract void testChecksumUpdateFailure() throws Exception; + // HELPER METHODS protected void setScannedTimestampOld(Container container) { diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java index bcdb7a173ccf..db364cb5c706 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java @@ -25,10 +25,12 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.any; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.atMost; import static org.mockito.Mockito.atMostOnce; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.eq; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -36,6 +38,7 @@ import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; +import java.io.IOException; import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; @@ -283,6 +286,16 @@ public void testUnhealthyContainerRescanned() throws Exception { assertEquals(1, metrics.getNumUnHealthyContainers()); } + @Test + @Override + public void testChecksumUpdateFailure() throws Exception { + doThrow(new IOException("Checksum update error for testing")).when(controller) + .updateContainerChecksum(anyLong(), any()); + scanContainer(corruptData); + verifyContainerMarkedUnhealthy(corruptData, atMostOnce()); + verify(corruptData.getContainerData(), atMostOnce()).setState(UNHEALTHY); + } + @Test public void testMerkleTreeWritten() throws Exception { // Merkle trees should not be written for open or deleted containers From d035c17421bf2cf911b8a909843a0f0b470d6d45 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Wed, 14 May 2025 19:40:51 -0400 Subject: [PATCH 48/62] Checkstyle --- .../hadoop/ozone/container/keyvalue/TestKeyValueHandler.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index c0c442bb780e..342a66d70cea 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -56,7 +56,6 @@ import java.util.EnumSet; import java.util.HashMap; import java.util.List; -import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; @@ -75,7 +74,6 @@ import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.security.token.TokenVerifier; import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager; -import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils; import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeWriter; import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient; import org.apache.hadoop.ozone.container.common.ContainerTestUtils; From 53336ae88505dee0502cbf66883ce7f534cd127f Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 15 May 2025 11:14:27 -0400 Subject: [PATCH 49/62] Fix scan gap for unit test --- .../container/ozoneimpl/ContainerScannerConfiguration.java | 4 ++++ .../TestContainerReconciliationWithMockDatanodes.java | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerConfiguration.java index 92ccd0d619e8..e8a9105131f1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScannerConfiguration.java @@ -217,4 +217,8 @@ public long getOnDemandBandwidthPerVolume() { public long getContainerScanMinGap() { return containerScanMinGap; } + + public void setContainerScanMinGap(long scanGap) { + containerScanMinGap = scanGap; + } } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java index 59b9ac2628be..858f7df4c385 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java @@ -312,6 +312,11 @@ private static class MockDatanode { this.conf = new OzoneConfiguration(); conf.set(HDDS_DATANODE_DIR_KEY, dataVolume.toString()); conf.set(OZONE_METADATA_DIRS, metadataVolume.toString()); + // This test triggers its own on-demand scans after reconciliation to retrieve the results. Scan gap must be + // disabled so that these checks run in addition to the on-demand scans triggered from inside reconciliation. + ContainerScannerConfiguration scanConf = conf.getObject(ContainerScannerConfiguration.class); + scanConf.setContainerScanMinGap(0); + conf.setFromObject(scanConf); containerSet = new ContainerSet(1000); MutableVolumeSet volumeSet = createVolumeSet(); From 250463851f93b606e8246038f0d6b30fddcd412a Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Fri, 16 May 2025 19:26:32 -0400 Subject: [PATCH 50/62] Fix metadata scan test --- .../ozone/container/ozoneimpl/OzoneContainer.java | 3 +-- ...BackgroundContainerMetadataScannerIntegration.java | 11 ++++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index eda97641e82c..a79d85e1e305 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -383,8 +383,6 @@ private void startContainerScrub() { return; } - initOnDemandContainerScanner(c); - backgroundScanners = new LinkedList<>(); // This config is for testing the scanners in isolation. if (c.isMetadataScanEnabled()) { @@ -394,6 +392,7 @@ private void startContainerScrub() { // This config is for testing the scanners in isolation. if (c.isDataScanEnabled()) { initContainerScanner(c); + initOnDemandContainerScanner(c); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java index 1672609a74ba..93085d4980c6 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java @@ -19,6 +19,8 @@ import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CONTAINER_DIR; +import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_METADATA_DIR; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -122,7 +124,14 @@ void testCorruptionDetected(TestContainerCorruptions corruption) waitForScmToSeeReplicaState(closedContainerID, UNHEALTHY); assertEquals(initialClosedChecksum, getContainerReplica(closedContainerID).getDataChecksum()); waitForScmToSeeReplicaState(openContainerID, UNHEALTHY); - assertEquals(0, getContainerReplica(openContainerID).getDataChecksum()); + if (corruption == MISSING_METADATA_DIR || corruption == MISSING_CONTAINER_DIR) { + // In these cases the tree cannot be generated when the container is marked unhealthy and the checksum should + // remain at 0. + assertEquals(0, getContainerReplica(openContainerID).getDataChecksum()); + } else { + // The checksum will be generated for the first time when the container is marked unhealthy. + assertNotEquals(0, getContainerReplica(openContainerID).getDataChecksum()); + } // Once the unhealthy replica is reported, the open container's lifecycle // state in SCM should move to closed. From 4be9992f95f8c3e80cbd7820541edb8bb8ca6e61 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 19 May 2025 18:28:01 -0400 Subject: [PATCH 51/62] Update based on review --- .../ozone/container/common/interfaces/Handler.java | 5 +++-- .../container/keyvalue/TestKeyValueHandler.java | 14 -------------- ...kgroundContainerMetadataScannerIntegration.java | 4 ++++ 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java index 91d6b1d3e82d..d998d0f7bd91 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/interfaces/Handler.java @@ -156,12 +156,13 @@ public abstract void markContainerForClose(Container container) throws IOException; /** - * Updates the container checksum information on disk and in memory. + * Updates the container checksum information on disk and in memory and sends an ICR if the container checksum was + * changed from its previous value. * * @param container The container to update * @param treeWriter The container merkle tree with the updated information about the container * @throws IOException For errors sending an ICR or updating the container checksum on disk. If the disk update - * fails, the checksum in memory will not be updated. + * fails, the checksum in memory will not be updated and an ICR will not be sent. */ public abstract void updateContainerChecksum(Container container, ContainerMerkleTreeWriter treeWriter) throws IOException; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index bed5ffcb5147..c2ae2fb92d8b 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -795,20 +795,6 @@ private static ContainerCommandRequestProto createContainerRequest( .build(); } - private KeyValueContainer addContainer(KeyValueHandler keyValueHandler, ContainerLayoutVersion layoutVersion) { - KeyValueContainerData kvData = new KeyValueContainerData(DUMMY_CONTAINER_ID, - layoutVersion, - (long) StorageUnit.GB.toBytes(1), UUID.randomUUID().toString(), - UUID.randomUUID().toString()); - kvData.setMetadataPath(tempDir.toString()); - kvData.setDbFile(dbFile.toFile()); - KeyValueContainer container = new KeyValueContainer(kvData, conf); - ContainerCommandRequestProto createContainerRequest = - createContainerRequest(DATANODE_UUID, DUMMY_CONTAINER_ID); - keyValueHandler.handleCreateContainer(createContainerRequest, container); - return container; - } - private KeyValueHandler createKeyValueHandler(Path path) throws IOException { final ContainerSet containerSet = newContainerSet(); final MutableVolumeSet volumeSet = mock(MutableVolumeSet.class); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java index 93085d4980c6..b25df7e11369 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerMetadataScannerIntegration.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.dn.scanner; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.OPEN; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_CONTAINER_DIR; import static org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions.MISSING_METADATA_DIR; @@ -103,6 +104,7 @@ void testCorruptionDetected(TestContainerCorruptions corruption) long openContainerID = writeDataToOpenContainer(); Container openContainer = getDnContainer(openContainerID); assertEquals(State.OPEN, openContainer.getContainerState()); + waitForScmToSeeReplicaState(openContainerID, OPEN); // Open containers should not yet have a checksum generated. assertEquals(0, getContainerReplica(openContainerID).getDataChecksum()); @@ -127,9 +129,11 @@ void testCorruptionDetected(TestContainerCorruptions corruption) if (corruption == MISSING_METADATA_DIR || corruption == MISSING_CONTAINER_DIR) { // In these cases the tree cannot be generated when the container is marked unhealthy and the checksum should // remain at 0. + // The tree is generated from metadata by the container changing to unhealthy, not by the metadata scanner. assertEquals(0, getContainerReplica(openContainerID).getDataChecksum()); } else { // The checksum will be generated for the first time when the container is marked unhealthy. + // The tree is generated from metadata by the container changing to unhealthy, not by the metadata scanner. assertNotEquals(0, getContainerReplica(openContainerID).getDataChecksum()); } From c0b89dd6bbc0ec8b222d7f1c8fee29e959cf2228 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 19 May 2025 18:36:29 -0400 Subject: [PATCH 52/62] pmd --- .../ozone/container/ozoneimpl/ContainerScanError.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanError.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanError.java index 08660fec48dc..a5dfe5bb8e21 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanError.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanError.java @@ -23,6 +23,11 @@ * This class is used to identify any error that may be seen while scanning a container. */ public class ContainerScanError { + + private final File unhealthyFile; + private final FailureType failureType; + private final Throwable exception; + /** * Represents the reason a container scan failed and a container should * be marked unhealthy. @@ -41,10 +46,6 @@ public enum FailureType { WRITE_FAILURE, } - private final File unhealthyFile; - private final FailureType failureType; - private final Throwable exception; - public ContainerScanError(FailureType failure, File unhealthyFile, Exception exception) { this.unhealthyFile = unhealthyFile; this.failureType = failure; From e24a24eb445cc0eb0c0baac9ccea5a746966a805 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 22 May 2025 17:12:17 -0400 Subject: [PATCH 53/62] Update ContainerData checksum info after reconcile with each peer Still save ICR for the end of reconciliation --- .../apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 32af85c82566..0fba5593a4fb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1668,7 +1668,7 @@ public void reconcileContainer(DNContainerOperationClient dnClient, Container // Based on repaired done with this peer, write the updated merkle tree to the container. // This updated tree will be used when we reconcile with the next peer. ContainerProtos.ContainerChecksumInfo previousChecksumInfo = latestChecksumInfo; - latestChecksumInfo = checksumManager.writeContainerDataTree(containerData, updatedTreeWriter); + latestChecksumInfo = updateAndGetContainerChecksum(container, updatedTreeWriter, false); // Log the results of reconciliation with this peer. long duration = Instant.now().toEpochMilli() - start; From dc27f74c19ce3309e1fd7b681d7a4e7c15a788a9 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Thu, 22 May 2025 18:47:55 -0400 Subject: [PATCH 54/62] Support bypassing scan gap (tests are failing) --- .../container/common/impl/ContainerSet.java | 33 ++++++++++++----- .../container/keyvalue/KeyValueHandler.java | 2 +- .../BackgroundContainerDataScanner.java | 2 +- .../BackgroundContainerMetadataScanner.java | 2 +- .../ozoneimpl/ContainerScanHelper.java | 16 +++++++-- .../OnDemandContainerDataScanner.java | 24 +++++++++++-- .../container/ozoneimpl/OzoneContainer.java | 2 +- .../common/impl/TestContainerSet.java | 35 ++++++++++++++++++- ...tainerReconciliationWithMockDatanodes.java | 15 +++----- .../keyvalue/TestKeyValueHandler.java | 2 +- .../TestOnDemandContainerDataScanner.java | 11 ++++++ 11 files changed, 115 insertions(+), 29 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index 7e95518e2cf0..00bd28a5bc69 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -49,6 +49,7 @@ import org.apache.hadoop.ozone.container.common.statemachine.StateContext; import org.apache.hadoop.ozone.container.common.utils.ContainerLogger; import org.apache.hadoop.ozone.container.common.volume.HddsVolume; +import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerDataScanner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -69,7 +70,7 @@ public class ContainerSet implements Iterable> { private long recoveringTimeout; private final Table containerIdsTable; // Handler that will be invoked when a scan of a container in this set is requested. - private Consumer> containerScanHandler; + private OnDemandContainerDataScanner containerScanner; public static ContainerSet newReadOnlyContainerSet(long recoveringTimeout) { return new ContainerSet(null, recoveringTimeout); @@ -129,22 +130,38 @@ public void ensureContainerNotMissing(long containerId, State state) throws Stor } /** - * @param scanner A callback that will be invoked when a scan of a container in this set is requested. + * @param scanner The scanner instance will be invoked when a scan of a container in this set is requested. */ - public void registerContainerScanHandler(Consumer> scanner) { - this.containerScanHandler = scanner; + public void registerOnDemandScanner(OnDemandContainerDataScanner scanner) { + this.containerScanner = scanner; } /** - * Triggers a scan of a container in this set using the registered scan handler. This is a no-op if no scan handler - * is registered or the container does not exist in the set. + * Triggers a scan of a container in this set. This is a no-op if no scanner is registered or the container does not + * exist in the set. * @param containerID The container in this set to scan. */ public void scanContainer(long containerID) { - if (containerScanHandler != null) { + if (containerScanner != null) { Container container = getContainer(containerID); if (container != null) { - containerScanHandler.accept(container); + containerScanner.scanContainer(container); + } else { + LOG.warn("Request to scan container {} which was not found in the container set", containerID); + } + } + } + + /** + * Triggers a scan of a container in this set regardless of whether it was recently scanned. + * This is a no-op if no scanner is registered or the container does not exist in the set. + * @param containerID The container in this set to scan. + */ + public void scanContainerWithoutGap(long containerID) { + if (containerScanner != null) { + Container container = getContainer(containerID); + if (container != null) { + containerScanner.scanContainerWithoutGap(container); } else { LOG.warn("Request to scan container {} which was not found in the container set", containerID); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java index 0fba5593a4fb..b956e6f50081 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java @@ -1715,7 +1715,7 @@ containerID, peer, checksumToString(previousDataChecksum), checksumToString(late } // Trigger on demand scanner, which will build the merkle tree based on the newly ingested data. - containerSet.scanContainer(containerID); + containerSet.scanContainerWithoutGap(containerID); sendICR(container); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java index 865641586503..8b1da664159e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerDataScanner.java @@ -56,7 +56,7 @@ public BackgroundContainerDataScanner(ContainerScannerConfiguration conf, canceler = new Canceler(); this.metrics = ContainerDataScannerMetrics.create(volume.toString()); this.metrics.setStorageDirectory(volume.toString()); - this.scanHelper = new ContainerScanHelper(LOG, controller, metrics, conf); + this.scanHelper = ContainerScanHelper.withScanGap(LOG, controller, metrics, conf); } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java index d2cac0b46224..02c786fed749 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/BackgroundContainerMetadataScanner.java @@ -42,7 +42,7 @@ public BackgroundContainerMetadataScanner(ContainerScannerConfiguration conf, super("ContainerMetadataScanner", conf.getMetadataScanInterval()); this.controller = controller; this.metrics = ContainerMetadataScannerMetrics.create(); - this.scanHelper = new ContainerScanHelper(LOG, controller, metrics, conf); + this.scanHelper = ContainerScanHelper.withScanGap(LOG, controller, metrics, conf); } @Override diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java index c7acbd7de95c..6bb2975c11fb 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java @@ -38,12 +38,22 @@ public class ContainerScanHelper { private final AbstractContainerScannerMetrics metrics; private final long minScanGap; - public ContainerScanHelper(Logger log, ContainerController controller, - AbstractContainerScannerMetrics metrics, ContainerScannerConfiguration conf) { + public static ContainerScanHelper withoutScanGap(Logger log, ContainerController controller, + AbstractContainerScannerMetrics metrics) { + return new ContainerScanHelper(log, controller, metrics, 0); + } + + public static ContainerScanHelper withScanGap(Logger log, ContainerController controller, + AbstractContainerScannerMetrics metrics, ContainerScannerConfiguration conf) { + return new ContainerScanHelper(log, controller, metrics, conf.getContainerScanMinGap()); + } + + private ContainerScanHelper(Logger log, ContainerController controller, + AbstractContainerScannerMetrics metrics, long minScanGap) { this.log = log; this.controller = controller; this.metrics = metrics; - this.minScanGap = conf.getContainerScanMinGap(); + this.minScanGap = minScanGap; } public void scanData(Container container, DataTransferThrottler throttler, Canceler canceler) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index bd26224b68d0..de4c6226ac4f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -44,6 +44,7 @@ public final class OnDemandContainerDataScanner { .KeySetView containerRescheduleCheckSet; private final OnDemandScannerMetrics metrics; private final ContainerScanHelper scannerHelper; + private final ContainerScanHelper scannerHelperWithoutGap; public OnDemandContainerDataScanner( ContainerScannerConfiguration conf, ContainerController controller) { @@ -53,11 +54,30 @@ public OnDemandContainerDataScanner( metrics = OnDemandScannerMetrics.create(); scanExecutor = Executors.newSingleThreadExecutor(); containerRescheduleCheckSet = ConcurrentHashMap.newKeySet(); - this.scannerHelper = new ContainerScanHelper(LOG, controller, metrics, conf); + this.scannerHelper = ContainerScanHelper.withScanGap(LOG, controller, metrics, conf); + this.scannerHelperWithoutGap = ContainerScanHelper.withoutScanGap(LOG, controller, metrics); } + /** + * Triggers an on-demand scan of this container. + * @return An Optional containing a Future representing the pending scan task if the task is queued. + * The optional is empty if the task is not queued due to an ongoing scan. + */ public Optional> scanContainer(Container container) { - if (!scannerHelper.shouldScanData(container)) { + return scanContainer(container, scannerHelper); + } + + /** + * Triggers an on-demand scan of this container regardless of whether it was recently scanned. + * @return An Optional containing a Future representing the pending scan task if the task is queued. + * The optional is empty if the task is not queued due to an ongoing scan. + */ + public Optional> scanContainerWithoutGap(Container container) { + return scanContainer(container, scannerHelperWithoutGap); + } + + private Optional> scanContainer(Container container, ContainerScanHelper helper) { + if (!helper.shouldScanData(container)) { return Optional.empty(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java index a79d85e1e305..e8a25aae1da6 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java @@ -440,7 +440,7 @@ private void initOnDemandContainerScanner(ContainerScannerConfiguration c) { return; } onDemandScanner = new OnDemandContainerDataScanner(c, controller); - containerSet.registerContainerScanHandler(onDemandScanner::scanContainer); + containerSet.registerOnDemandScanner(onDemandScanner); } /** diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java index c5b8e41b69b4..10c897b1f54c 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/TestContainerSet.java @@ -26,6 +26,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -50,6 +51,7 @@ import org.apache.hadoop.ozone.container.keyvalue.ContainerLayoutTestInfo; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; +import org.apache.hadoop.ozone.container.ozoneimpl.OnDemandContainerDataScanner; /** * Class used to test ContainerSet operations. @@ -294,11 +296,15 @@ public void testContainerScanHandler(ContainerLayoutVersion layout) throws Excep containerSet.scanContainer(FIRST_ID); AtomicLong invocationCount = new AtomicLong(); - containerSet.registerContainerScanHandler(c -> { + OnDemandContainerDataScanner mockScanner = mock(OnDemandContainerDataScanner.class); + when(mockScanner.scanContainer(any())).then(inv -> { + KeyValueContainer c = inv.getArgument(0); // If the handler was incorrectly triggered for a non-existent container, this assert would fail. assertEquals(FIRST_ID, c.getContainerData().getContainerID()); invocationCount.getAndIncrement(); + return null; }); + containerSet.registerOnDemandScanner(mockScanner); // Scan of an existing container when a handler is registered should trigger a scan. containerSet.scanContainer(FIRST_ID); @@ -309,6 +315,33 @@ public void testContainerScanHandler(ContainerLayoutVersion layout) throws Excep assertEquals(1, invocationCount.get()); } + @ContainerLayoutTestInfo.ContainerTest + public void testContainerScanHandlerWithoutGap(ContainerLayoutVersion layout) throws Exception { + setLayoutVersion(layout); + ContainerSet containerSet = createContainerSet(); + // Scan when no handler is registered should not throw an exception. + containerSet.scanContainer(FIRST_ID); + + AtomicLong invocationCount = new AtomicLong(); + OnDemandContainerDataScanner mockScanner = mock(OnDemandContainerDataScanner.class); + when(mockScanner.scanContainerWithoutGap(any())).then(inv -> { + KeyValueContainer c = inv.getArgument(0); + // If the handler was incorrectly triggered for a non-existent container, this assert would fail. + assertEquals(FIRST_ID, c.getContainerData().getContainerID()); + invocationCount.getAndIncrement(); + return null; + }); + containerSet.registerOnDemandScanner(mockScanner); + + // Scan of an existing container when a handler is registered should trigger a scan. + containerSet.scanContainerWithoutGap(FIRST_ID); + assertEquals(1, invocationCount.get()); + + // Scan of non-existent container should not throw exception or trigger an additional invocation. + containerSet.scanContainerWithoutGap(FIRST_ID - 1); + assertEquals(1, invocationCount.get()); + } + /** * Verify that {@code result} contains {@code count} containers * with IDs in increasing order starting at {@code startId}. diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java index 2566f8c71c93..e95fc3ab3ec3 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java @@ -225,13 +225,13 @@ public void testContainerReconciliation(int numBlocksToDelete, int numChunksToCo * Uses the on-demand container scanner metrics to wait for the expected number of on-demand scans to complete on * every datanode. */ - private void waitForExpectedScanCount(int expectedCount) throws Exception { + private void waitForExpectedScanCount(int expectedCountPerDatanode) throws Exception { for (MockDatanode datanode: datanodes) { try { - GenericTestUtils.waitFor(() -> datanode.getOnDemandScanCount() == expectedCount, 100, 10_000); + GenericTestUtils.waitFor(() -> datanode.getOnDemandScanCount() == expectedCountPerDatanode, 100, 10_000); } catch (TimeoutException ex) { LOG.error("Timed out waiting for on-demand scan count {} to reach expected count {} on datanode {}", - datanode.getOnDemandScanCount(), expectedCount, datanode); + datanode.getOnDemandScanCount(), expectedCountPerDatanode, datanode); throw ex; } } @@ -314,11 +314,6 @@ private static class MockDatanode { this.conf = new OzoneConfiguration(); conf.set(HDDS_DATANODE_DIR_KEY, dataVolume.toString()); conf.set(OZONE_METADATA_DIRS, metadataVolume.toString()); - // This test triggers its own on-demand scans after reconciliation to retrieve the results. Scan gap must be - // disabled so that these checks run in addition to the on-demand scans triggered from inside reconciliation. - ContainerScannerConfiguration scanConf = conf.getObject(ContainerScannerConfiguration.class); - scanConf.setContainerScanMinGap(0); - conf.setFromObject(scanConf); containerSet = newContainerSet(); MutableVolumeSet volumeSet = createVolumeSet(); @@ -330,7 +325,7 @@ private static class MockDatanode { onDemandScanner = new OnDemandContainerDataScanner( conf.getObject(ContainerScannerConfiguration.class), controller); // Register the on-demand container scanner with the container set used by the KeyValueHandler. - containerSet.registerContainerScanHandler(onDemandScanner::scanContainer); + containerSet.registerOnDemandScanner(onDemandScanner); } public DatanodeDetails getDnDetails() { @@ -425,7 +420,7 @@ public KeyValueContainer getContainer(long containerID) { * Triggers a synchronous scan of the container. This method will block until the scan completes. */ public void scanContainer(long containerID) { - Optional> scanFuture = onDemandScanner.scanContainer(containerSet.getContainer(containerID)); + Optional> scanFuture = onDemandScanner.scanContainerWithoutGap(containerSet.getContainer(containerID)); assertTrue(scanFuture.isPresent()); try { diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java index c2ae2fb92d8b..503d8c0855d8 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java @@ -820,7 +820,7 @@ private KeyValueHandler createKeyValueHandler(Path path) throws IOException { Collections.singletonMap(ContainerType.KeyValueContainer, kvHandler)); OnDemandContainerDataScanner onDemandScanner = new OnDemandContainerDataScanner( conf.getObject(ContainerScannerConfiguration.class), controller); - containerSet.registerContainerScanHandler(onDemandScanner::scanContainer); + containerSet.registerOnDemandScanner(onDemandScanner); return kvHandler; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java index 12812583e6f1..ec5ebfc32497 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java @@ -84,6 +84,17 @@ public void testRecentlyScannedContainerIsSkipped() throws Exception { verify(healthy, never()).scanData(any(), any()); } + @Test + public void testBypassScanGap() throws Exception { + setScannedTimestampRecent(healthy); + + Optional> scanFuture = onDemandScanner.scanContainerWithoutGap(healthy); + if (scanFuture.isPresent()) { + scanFuture.get().get(); + } + verify(healthy, times(1)).scanData(any(), any()); + } + @Test @Override public void testPreviouslyScannedContainerIsScanned() throws Exception { From e2974b40a448e4c194830f6b1c61959ce0f7c72e Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 27 May 2025 11:40:29 -0400 Subject: [PATCH 55/62] Checkstyle --- .../apache/hadoop/ozone/container/common/impl/ContainerSet.java | 1 - .../hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java index 00bd28a5bc69..e03e61605eb1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java @@ -37,7 +37,6 @@ import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Consumer; import java.util.function.ToLongFunction; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State; diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java index 6bb2975c11fb..9e04b7df157d 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java @@ -32,7 +32,7 @@ /** * Mixin to handle common data and metadata scan operations among background and on-demand scanners. */ -public class ContainerScanHelper { +public final class ContainerScanHelper { private final Logger log; private final ContainerController controller; private final AbstractContainerScannerMetrics metrics; From 34b4b9a1b04e0d456574a2a56a34613f6af14202 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 27 May 2025 12:01:07 -0400 Subject: [PATCH 56/62] Fix scan gap bug. All tests expected to pass --- .../container/ozoneimpl/OnDemandContainerDataScanner.java | 6 +++--- .../ozoneimpl/TestOnDemandContainerDataScanner.java | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java index de4c6226ac4f..a85358406bd1 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OnDemandContainerDataScanner.java @@ -85,7 +85,7 @@ private Optional> scanContainer(Container container, ContainerScanH long containerId = container.getContainerData().getContainerID(); if (addContainerToScheduledContainers(containerId)) { resultFuture = scanExecutor.submit(() -> { - performOnDemandScan(container); + performOnDemandScan(container, helper); removeContainerFromScheduledContainers(containerId); }); } @@ -101,9 +101,9 @@ private void removeContainerFromScheduledContainers( containerRescheduleCheckSet.remove(containerId); } - private void performOnDemandScan(Container container) { + private void performOnDemandScan(Container container, ContainerScanHelper helper) { try { - scannerHelper.scanData(container, throttler, canceler); + helper.scanData(container, throttler, canceler); } catch (IOException e) { LOG.warn("Unexpected exception while scanning container " + container.getContainerData().getContainerID(), e); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java index ec5ebfc32497..42b3da8050d6 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerDataScanner.java @@ -88,10 +88,10 @@ public void testRecentlyScannedContainerIsSkipped() throws Exception { public void testBypassScanGap() throws Exception { setScannedTimestampRecent(healthy); - Optional> scanFuture = onDemandScanner.scanContainerWithoutGap(healthy); - if (scanFuture.isPresent()) { - scanFuture.get().get(); - } + Optional> scanFutureOptional = onDemandScanner.scanContainerWithoutGap(healthy); + assertTrue(scanFutureOptional.isPresent()); + Future scanFuture = scanFutureOptional.get(); + scanFuture.get(); verify(healthy, times(1)).scanData(any(), any()); } From 5fda700d6c88374d4a91c980d7bfc51f9b5e487f Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Mon, 2 Jun 2025 16:25:34 -0700 Subject: [PATCH 57/62] Fix scan gap call --- .../dn/checksum/TestContainerCommandReconciliation.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 633d4dcfaa31..74afdf68ed39 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -368,7 +368,7 @@ public void testContainerChecksumWithBlockMissing() throws Exception { db.getStore().flushDB(); } - datanodeStateMachine.getContainer().getContainerSet().scanContainer(containerID); + datanodeStateMachine.getContainer().getContainerSet().scanContainerWithoutGap(containerID); waitForDataChecksumsAtSCM(containerID, 2); ContainerProtos.ContainerChecksumInfo containerChecksumAfterBlockDelete = readChecksumFile(container.getContainerData()); @@ -445,7 +445,7 @@ public void testContainerChecksumChunkCorruption() throws Exception { db.getStore().flushDB(); } - datanodeStateMachine.getContainer().getContainerSet().scanContainer(containerID); + datanodeStateMachine.getContainer().getContainerSet().scanContainerWithoutGap(containerID); waitForDataChecksumsAtSCM(containerID, 2); ContainerProtos.ContainerChecksumInfo containerChecksumAfterChunkCorruption = readChecksumFile(container.getContainerData()); @@ -515,7 +515,7 @@ public void testDataChecksumReportedAtSCM() throws Exception { db.getStore().flushDB(); } - datanodeStateMachine.getContainer().getContainerSet().scanContainer(containerID); + datanodeStateMachine.getContainer().getContainerSet().scanContainerWithoutGap(containerID); waitForDataChecksumsAtSCM(containerID, 2); ContainerProtos.ContainerChecksumInfo containerChecksumAfterBlockDelete = readChecksumFile(container.getContainerData()); From de6a757a30aa0cc5ef5ba2da4cf2afa6f3475a69 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 3 Jun 2025 10:14:15 -0700 Subject: [PATCH 58/62] Use temp dir for test, fix space overflow in CI --- .../keyvalue/impl/TestFilePerBlockStrategy.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestFilePerBlockStrategy.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestFilePerBlockStrategy.java index 6265ac7d3be9..243fe218c5e8 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestFilePerBlockStrategy.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/impl/TestFilePerBlockStrategy.java @@ -18,6 +18,8 @@ package org.apache.hadoop.ozone.container.keyvalue.impl; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; import static org.apache.hadoop.ozone.container.ContainerTestHelper.getChunk; import static org.apache.hadoop.ozone.container.ContainerTestHelper.setDataChecksum; import static org.apache.hadoop.ozone.container.common.ContainerTestUtils.WRITE_STAGE; @@ -25,8 +27,11 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.nio.file.Paths; import java.security.MessageDigest; import java.util.ArrayList; import java.util.List; @@ -57,6 +62,7 @@ import org.apache.hadoop.ozone.container.keyvalue.interfaces.ChunkManager; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -66,6 +72,9 @@ */ public class TestFilePerBlockStrategy extends CommonChunkManagerTestCases { + @TempDir + private File tempDir; + @Test public void testDeletePartialChunkWithOffsetUnsupportedRequest() { // GIVEN @@ -313,6 +322,10 @@ public KeyValueHandler createKeyValueHandler(ContainerSet containerSet) throws IOException { OzoneConfiguration conf = new OzoneConfiguration(); String dnUuid = UUID.randomUUID().toString(); + Path dataVolume = Paths.get(tempDir.toString(), "data"); + Path metadataVolume = Paths.get(tempDir.toString(), "metadata"); + conf.set(HDDS_DATANODE_DIR_KEY, dataVolume.toString()); + conf.set(OZONE_METADATA_DIRS, metadataVolume.toString()); MutableVolumeSet volumeSet = new MutableVolumeSet(dnUuid, conf, null, StorageVolume.VolumeType.DATA_VOLUME, null); return ContainerTestUtils.getKeyValueHandler(conf, dnUuid, containerSet, volumeSet); From 1574cddb86e08d1a574228d9ac3c0b469d8472dc Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 3 Jun 2025 16:04:09 -0700 Subject: [PATCH 59/62] Add configs in test to support restarting DN and SCM quickly --- .../TestContainerCommandReconciliation.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 74afdf68ed39..0c246e8ab278 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -21,9 +21,12 @@ import static org.apache.commons.lang3.RandomStringUtils.randomAlphabetic; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_BLOCK_TOKEN_ENABLED; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_CONTAINER_TOKEN_ENABLED; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_KERBEROS_KEYTAB_FILE_KEY; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_KERBEROS_PRINCIPAL_KEY; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SECRET_KEY_EXPIRY_DURATION; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SECRET_KEY_ROTATE_CHECK_DURATION; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SECRET_KEY_ROTATE_DURATION; @@ -34,6 +37,9 @@ import static org.apache.hadoop.hdds.scm.ScmConfig.ConfigStrings.HDDS_SCM_KERBEROS_PRINCIPAL_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CHUNK_SIZE_KEY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; import static org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig.ConfigStrings.HDDS_SCM_HTTP_KERBEROS_KEYTAB_FILE_KEY; import static org.apache.hadoop.hdds.scm.server.SCMHTTPServerConfig.ConfigStrings.HDDS_SCM_HTTP_KERBEROS_PRINCIPAL_KEY; import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS; @@ -144,6 +150,15 @@ public static void init() throws Exception { conf.set(OZONE_METADATA_DIRS, testDir.getAbsolutePath()); conf.setStorageSize(OZONE_SCM_CHUNK_SIZE_KEY, 128 * 1024, StorageUnit.BYTES); conf.setStorageSize(OZONE_SCM_BLOCK_SIZE, 512 * 1024, StorageUnit.BYTES); + // Support restarting datanodes and SCM in a rolling fashion to test checksum reporting after restart. + // Datanodes need to heartbeat more frequently, because they will not know that SCM was restarted until they + // heartbeat and SCM indicates they need to re-register. + conf.set(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, "200ms"); + conf.set(HDDS_HEARTBEAT_INTERVAL, "1s"); + conf.set(OZONE_SCM_STALENODE_INTERVAL, "3s"); + conf.set(OZONE_SCM_DEADNODE_INTERVAL, "6s"); + conf.set(HDDS_NODE_REPORT_INTERVAL, "5s"); + conf.set(HDDS_CONTAINER_REPORT_INTERVAL, "5s"); startMiniKdc(); setSecureConfig(); @@ -291,7 +306,6 @@ public void testGetEmptyChecksumInfo() throws Exception { Container container = targetDN.getDatanodeStateMachine().getContainer() .getContainerSet().getContainer(containerID); File treeFile = getContainerChecksumFile(container.getContainerData()); - // TODO After HDDS-10379 the file will already exist and need to be overwritten. assertTrue(treeFile.exists()); Files.write(treeFile.toPath(), new byte[]{}, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.SYNC); From 7ff972c70046b7491651947cf67b48f88a30a26b Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 3 Jun 2025 16:26:40 -0700 Subject: [PATCH 60/62] Use standard corruption injection in failing test --- .../TestContainerCommandReconciliation.java | 44 +++++-------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 0c246e8ab278..4ae471fb885c 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -105,13 +105,17 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; +import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; +import org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions; import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils; import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.ozone.test.GenericTestUtils; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; @@ -131,6 +135,7 @@ public class TestContainerCommandReconciliation { private static OzoneConfiguration conf; private static DNContainerOperationClient dnClient; private static final String KEY_NAME = "testkey"; + private static final Logger LOG = LoggerFactory.getLogger(TestContainerCommandReconciliation.class); @TempDir private static File testDir; @@ -426,37 +431,10 @@ public void testContainerChecksumChunkCorruption() throws Exception { List blockDatas = blockManager.listBlock(container, -1, 100); long oldDataChecksum = oldContainerChecksumInfo.getContainerMerkleTree().getDataChecksum(); - // 2. Corrupt first chunk for all the blocks - try (DBHandle db = BlockUtils.getDB(containerData, conf); - BatchOperation op = db.getStore().getBatchHandler().initBatchOperation()) { - for (BlockData blockData : blockDatas) { - // Modify the block metadata to simulate chunk corruption. - ContainerProtos.BlockData.Builder blockDataBuilder = blockData.getProtoBufMessage().toBuilder(); - blockDataBuilder.clearChunks(); - - ContainerProtos.ChunkInfo chunkInfo = blockData.getChunks().get(0); - ContainerProtos.ChecksumData.Builder checksumDataBuilder = ContainerProtos.ChecksumData.newBuilder() - .setBytesPerChecksum(chunkInfo.getChecksumData().getBytesPerChecksum()) - .setType(chunkInfo.getChecksumData().getType()); - - for (ByteString checksum : chunkInfo.getChecksumData().getChecksumsList()) { - byte[] checksumBytes = checksum.toByteArray(); - // Modify the checksum bytes to simulate corruption. - checksumBytes[0] = (byte) (checksumBytes[0] - 1); - checksumDataBuilder.addChecksums(ByteString.copyFrom(checksumBytes)).build(); - } - chunkInfo = chunkInfo.toBuilder().setChecksumData(checksumDataBuilder.build()).build(); - blockDataBuilder.addChunks(chunkInfo); - for (int i = 1; i < blockData.getChunks().size(); i++) { - blockDataBuilder.addChunks(blockData.getChunks().get(i)); - } - - // Modify the block metadata from the container db to simulate chunk corruption. - db.getStore().getBlockDataTable().putWithBatch(op, containerData.getBlockKey(blockData.getLocalID()), - BlockData.getFromProtoBuf(blockDataBuilder.build())); - } - db.getStore().getBatchHandler().commitBatchOperation(op); - db.getStore().flushDB(); + // 2. Corrupt every block in one replica. + for (BlockData blockData : blockDatas) { + long blockID = blockData.getLocalID(); + TestContainerCorruptions.CORRUPT_BLOCK.applyTo(container, blockID); } datanodeStateMachine.getContainer().getContainerSet().scanContainerWithoutGap(containerID); @@ -569,11 +547,13 @@ private void waitForDataChecksumsAtSCM(long containerID, int expectedSize) throw ClientVersion.CURRENT_VERSION).stream() .map(HddsProtos.SCMContainerReplicaProto::getDataChecksum) .collect(Collectors.toSet()); + LOG.info("Waiting for {} total unique checksums from container {} to be reported to SCM. Currently {} unique" + + "checksums are reported.", expectedSize, containerID, dataChecksums.size()); return dataChecksums.size() == expectedSize; } catch (Exception ex) { return false; } - }, 500, 20000); + }, 1000, 20000); } private Pair getDataAndContainer(boolean close, int dataLen, String volumeName, String bucketName) From 02a3ac64806b9ccdc5586f897ff19f7ee8917cfc Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 3 Jun 2025 16:30:11 -0700 Subject: [PATCH 61/62] Checkstyle --- .../dn/checksum/TestContainerCommandReconciliation.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 4ae471fb885c..16a0697299ab 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -105,7 +105,6 @@ import org.apache.hadoop.ozone.container.common.interfaces.Container; import org.apache.hadoop.ozone.container.common.interfaces.DBHandle; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine; -import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer; import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData; import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler; @@ -114,15 +113,14 @@ import org.apache.hadoop.ozone.container.keyvalue.interfaces.BlockManager; import org.apache.hadoop.ozone.om.OzoneManager; import org.apache.hadoop.security.UserGroupInformation; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.ozone.test.GenericTestUtils; -import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This class tests container commands for reconciliation. From 54cbf92ef69427d7f95dadae71c3e0a3717b8867 Mon Sep 17 00:00:00 2001 From: Ethan Rose Date: Tue, 3 Jun 2025 16:59:01 -0700 Subject: [PATCH 62/62] Findbugs --- .../ozone/dn/checksum/TestContainerCommandReconciliation.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java index 16a0697299ab..fa4c66a2e028 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java @@ -420,7 +420,6 @@ public void testContainerChecksumChunkCorruption() throws Exception { HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanode(dataNodeDetails.get(0)); DatanodeStateMachine datanodeStateMachine = hddsDatanodeService.getDatanodeStateMachine(); Container container = datanodeStateMachine.getContainer().getContainerSet().getContainer(containerID); - KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData(); ContainerProtos.ContainerChecksumInfo oldContainerChecksumInfo = readChecksumFile(container.getContainerData()); KeyValueHandler kvHandler = (KeyValueHandler) datanodeStateMachine.getContainer().getDispatcher() .getHandler(ContainerProtos.ContainerType.KeyValueContainer);