From a90b32c406f33eba4c67d3b4686f5b3835d3cdbc Mon Sep 17 00:00:00 2001 From: Gargi Jaiswal Date: Mon, 14 Apr 2025 15:34:26 +0530 Subject: [PATCH 1/6] Exclude UNHEALTHY DNs from status subcommand output --- .../scm/node/DiskBalancerDeadNodeHandler.java | 48 ++++++++++++ .../hdds/scm/node/DiskBalancerManager.java | 21 +++++- .../scm/server/StorageContainerManager.java | 4 + .../node/TestDiskBalancerDeadNodeHandler.java | 75 +++++++++++++++++++ 4 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java create mode 100644 hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerDeadNodeHandler.java diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java new file mode 100644 index 000000000000..f499d6aa6ab0 --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.node; + +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.server.events.EventHandler; +import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Updates the DiskBalancer status of a datanode to UNKNOWN when the datanode is marked dead. + * + * This handler listens for DEAD_NODE events and ensures that DiskBalancer + * does not retain outdated state for unreachable datanodes. + */ + + +public class DiskBalancerDeadNodeHandler implements EventHandler { + + private static final Logger LOG = LoggerFactory.getLogger(DiskBalancerDeadNodeHandler.class); + private final DiskBalancerManager diskBalancerManager; + + public DiskBalancerDeadNodeHandler(DiskBalancerManager diskBalancerManager) { + this.diskBalancerManager = diskBalancerManager; + } + + @Override + public void onMessage(DatanodeDetails datanodeDetails, EventPublisher publisher) { + LOG.info("Marking DiskBalancer status UNKNOWN for dead DN {}", datanodeDetails.getUuidString()); + diskBalancerManager.markStatusUnknown(datanodeDetails); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java index 74c93bab70a4..d22f7ac66da2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java @@ -120,7 +120,8 @@ public List getDiskBalancerStatus( .map(dn -> getInfoProto(dn, clientVersion)) .collect(Collectors.toList()); } else { - return nodeManager.getAllNodes().stream() + return nodeManager.getNodes(IN_SERVICE, + HddsProtos.NodeState.HEALTHY).stream() .filter(dn -> shouldReturnDatanode(filterStatus, dn)) .map(dn -> getInfoProto((DatanodeInfo)dn, clientVersion)) .collect(Collectors.toList()); @@ -302,7 +303,7 @@ private double getVolumeDataDensitySumForDatanodeDetails( return volumeDensitySum; } - private DiskBalancerStatus getStatus(DatanodeDetails datanodeDetails) { + public DiskBalancerStatus getStatus(DatanodeDetails datanodeDetails) { return statusMap.computeIfAbsent(datanodeDetails, dn -> new DiskBalancerStatus(DiskBalancerRunningStatus.UNKNOWN, new DiskBalancerConfiguration(), 0, 0, 0, 0)); } @@ -333,6 +334,22 @@ public void processDiskBalancerReport(DiskBalancerReportProto reportProto, } } + public void markStatusUnknown(DatanodeDetails dn) { + DiskBalancerStatus currentStatus = statusMap.get(dn); + if (currentStatus != null && + currentStatus.getRunningStatus() != DiskBalancerRunningStatus.UNKNOWN) { + DiskBalancerStatus unknownStatus = new DiskBalancerStatus( + DiskBalancerRunningStatus.UNKNOWN, + currentStatus.getDiskBalancerConfiguration(), + currentStatus.getSuccessMoveCount(), + currentStatus.getFailureMoveCount(), + currentStatus.getBytesToMove(), + currentStatus.getBalancedBytes() + ); + statusMap.put(dn, unknownStatus); + } + } + private DiskBalancerConfiguration attachDiskBalancerConf( DatanodeDetails dn, Optional threshold, Optional bandwidthInMB, Optional parallelThread) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 5e5680c8b8ef..783aa0275e82 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -120,6 +120,7 @@ import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl; import org.apache.hadoop.hdds.scm.node.DeadNodeHandler; +import org.apache.hadoop.hdds.scm.node.DiskBalancerDeadNodeHandler; import org.apache.hadoop.hdds.scm.node.DiskBalancerManager; import org.apache.hadoop.hdds.scm.node.DiskBalancerReportHandler; import org.apache.hadoop.hdds.scm.node.HealthyReadOnlyNodeHandler; @@ -499,6 +500,8 @@ private void initializeEventHandlers() { new PipelineActionHandler(pipelineManager, scmContext); DiskBalancerReportHandler diskBalancerReportHandler = new DiskBalancerReportHandler(diskBalancerManager); + DiskBalancerDeadNodeHandler diskBalancerDeadNodeHandler = + new DiskBalancerDeadNodeHandler(diskBalancerManager); eventQueue.addHandler(SCMEvents.DATANODE_COMMAND, scmNodeManager); eventQueue.addHandler(SCMEvents.RETRIABLE_DATANODE_COMMAND, scmNodeManager); @@ -577,6 +580,7 @@ private void initializeEventHandlers() { scmBlockManager.getDeletedBlockLog()::onSent); eventQueue.addHandler(SCMEvents.DISK_BALANCER_REPORT, diskBalancerReportHandler); + eventQueue.addHandler(SCMEvents.DEAD_NODE, diskBalancerDeadNodeHandler); } private void initializeCertificateClient() throws IOException { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerDeadNodeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerDeadNodeHandler.java new file mode 100644 index 000000000000..d4f8104c5898 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerDeadNodeHandler.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.node; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.UUID; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DiskBalancerRunningStatus; +import org.apache.hadoop.hdds.scm.ha.SCMContext; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for DiskBalancerDeadNodeHandler. + */ +public class TestDiskBalancerDeadNodeHandler { + + private DiskBalancerManager diskBalancerManager; + private DiskBalancerDeadNodeHandler deadNodeHandler; + private DatanodeDetails healthyDn; + private DatanodeDetails deadDn; + + @BeforeEach + public void setup() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + String storageDir = GenericTestUtils.getTempPath( + TestDiskBalancerDeadNodeHandler.class.getSimpleName() + UUID.randomUUID()); + conf.set("ozone.metadata.dirs", storageDir); + + diskBalancerManager = new DiskBalancerManager(conf, new EventQueue(), + SCMContext.emptyContext(), null); + deadNodeHandler = new DiskBalancerDeadNodeHandler(diskBalancerManager); + + // Create two DNs: one healthy and one dead + healthyDn = DatanodeDetails.newBuilder().setUuid(UUID.randomUUID()).build(); + deadDn = DatanodeDetails.newBuilder().setUuid(UUID.randomUUID()).build(); + + diskBalancerManager.addRunningDatanode(healthyDn); + diskBalancerManager.addRunningDatanode(deadDn); + } + + @Test + public void testDeadNodeHandlerUpdatesStatusToUnknown() { + // Verify initial status of both datanodes + assertEquals(DiskBalancerRunningStatus.RUNNING, diskBalancerManager.getStatus(healthyDn).getRunningStatus()); + assertEquals(DiskBalancerRunningStatus.RUNNING, diskBalancerManager.getStatus(deadDn).getRunningStatus()); + + deadNodeHandler.onMessage(deadDn, null); + + // Verify that the status of the dead datanode is updated to UNKNOWN + assertEquals(DiskBalancerRunningStatus.UNKNOWN, diskBalancerManager.getStatus(deadDn).getRunningStatus()); + + // Verify that the status of the healthy datanode remains unchanged + assertEquals(DiskBalancerRunningStatus.RUNNING, diskBalancerManager.getStatus(healthyDn).getRunningStatus()); + } +} From 39390af6d43ce11dd4a598f29fc978cd12253fae Mon Sep 17 00:00:00 2001 From: Gargi Jaiswal Date: Mon, 14 Apr 2025 15:35:24 +0530 Subject: [PATCH 2/6] fixed checkstyle --- .../apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java index f499d6aa6ab0..dc400c871be9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java @@ -30,7 +30,6 @@ * does not retain outdated state for unreachable datanodes. */ - public class DiskBalancerDeadNodeHandler implements EventHandler { private static final Logger LOG = LoggerFactory.getLogger(DiskBalancerDeadNodeHandler.class); From 246e15d07f400207568f576b6b32e5fa0cfb41f3 Mon Sep 17 00:00:00 2001 From: Gargi Jaiswal Date: Wed, 16 Apr 2025 11:12:36 +0530 Subject: [PATCH 3/6] reused deadNodeHandler --- .../hadoop/hdds/scm/node/DeadNodeHandler.java | 16 +++- .../scm/node/DiskBalancerDeadNodeHandler.java | 47 ------------ .../hdds/scm/node/DiskBalancerManager.java | 10 +-- .../scm/server/StorageContainerManager.java | 6 +- .../hdds/scm/node/TestDeadNodeHandler.java | 39 +++++++++- .../node/TestDiskBalancerDeadNodeHandler.java | 75 ------------------- 6 files changed, 56 insertions(+), 137 deletions(-) delete mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java delete mode 100644 hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerDeadNodeHandler.java diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java index 69de282e81e1..875e073d5579 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java @@ -51,6 +51,7 @@ public class DeadNodeHandler implements EventHandler { private final ContainerManager containerManager; @Nullable private final DeletedBlockLog deletedBlockLog; + private final DiskBalancerManager diskBalancerManager; private static final Logger LOG = LoggerFactory.getLogger(DeadNodeHandler.class); @@ -58,16 +59,25 @@ public class DeadNodeHandler implements EventHandler { public DeadNodeHandler(final NodeManager nodeManager, final PipelineManager pipelineManager, final ContainerManager containerManager) { - this(nodeManager, pipelineManager, containerManager, null); + this(nodeManager, pipelineManager, containerManager, null, null); } public DeadNodeHandler(final NodeManager nodeManager, final PipelineManager pipelineManager, final ContainerManager containerManager, + final DiskBalancerManager diskBalancerManager) { + this(nodeManager, pipelineManager, containerManager, diskBalancerManager, null); + } + + public DeadNodeHandler(final NodeManager nodeManager, + final PipelineManager pipelineManager, + final ContainerManager containerManager, + final DiskBalancerManager diskBalancerManager, @Nullable final DeletedBlockLog deletedBlockLog) { this.nodeManager = nodeManager; this.pipelineManager = pipelineManager; this.containerManager = containerManager; + this.diskBalancerManager = diskBalancerManager; this.deletedBlockLog = deletedBlockLog; } @@ -120,6 +130,10 @@ public void onMessage(final DatanodeDetails datanodeDetails, nodeManager.getNode(datanodeDetails.getID()) .getParent() == null); } + + // Mark DiskBalancer status as UNKNOWN for the dead datanode + LOG.info("Marking DiskBalancer status UNKNOWN for dead DN {}", datanodeDetails.getUuidString()); + diskBalancerManager.markStatusUnknown(datanodeDetails); } catch (NodeNotFoundException ex) { // This should not happen, we cannot get a dead node event for an // unregistered datanode! diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java deleted file mode 100644 index dc400c871be9..000000000000 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerDeadNodeHandler.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdds.scm.node; - -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.server.events.EventHandler; -import org.apache.hadoop.hdds.server.events.EventPublisher; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Updates the DiskBalancer status of a datanode to UNKNOWN when the datanode is marked dead. - * - * This handler listens for DEAD_NODE events and ensures that DiskBalancer - * does not retain outdated state for unreachable datanodes. - */ - -public class DiskBalancerDeadNodeHandler implements EventHandler { - - private static final Logger LOG = LoggerFactory.getLogger(DiskBalancerDeadNodeHandler.class); - private final DiskBalancerManager diskBalancerManager; - - public DiskBalancerDeadNodeHandler(DiskBalancerManager diskBalancerManager) { - this.diskBalancerManager = diskBalancerManager; - } - - @Override - public void onMessage(DatanodeDetails datanodeDetails, EventPublisher publisher) { - LOG.info("Marking DiskBalancer status UNKNOWN for dead DN {}", datanodeDetails.getUuidString()); - diskBalancerManager.markStatusUnknown(datanodeDetails); - } -} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java index d22f7ac66da2..a22fa43304fd 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java @@ -338,14 +338,8 @@ public void markStatusUnknown(DatanodeDetails dn) { DiskBalancerStatus currentStatus = statusMap.get(dn); if (currentStatus != null && currentStatus.getRunningStatus() != DiskBalancerRunningStatus.UNKNOWN) { - DiskBalancerStatus unknownStatus = new DiskBalancerStatus( - DiskBalancerRunningStatus.UNKNOWN, - currentStatus.getDiskBalancerConfiguration(), - currentStatus.getSuccessMoveCount(), - currentStatus.getFailureMoveCount(), - currentStatus.getBytesToMove(), - currentStatus.getBalancedBytes() - ); + DiskBalancerStatus unknownStatus = new DiskBalancerStatus(DiskBalancerRunningStatus.UNKNOWN, + new DiskBalancerConfiguration(), 0, 0, 0, 0); statusMap.put(dn, unknownStatus); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 783aa0275e82..627c080a26fe 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -120,7 +120,6 @@ import org.apache.hadoop.hdds.scm.net.NetworkTopology; import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl; import org.apache.hadoop.hdds.scm.node.DeadNodeHandler; -import org.apache.hadoop.hdds.scm.node.DiskBalancerDeadNodeHandler; import org.apache.hadoop.hdds.scm.node.DiskBalancerManager; import org.apache.hadoop.hdds.scm.node.DiskBalancerReportHandler; import org.apache.hadoop.hdds.scm.node.HealthyReadOnlyNodeHandler; @@ -478,7 +477,7 @@ private void initializeEventHandlers() { StaleNodeHandler staleNodeHandler = new StaleNodeHandler(scmNodeManager, pipelineManager); DeadNodeHandler deadNodeHandler = new DeadNodeHandler(scmNodeManager, - pipelineManager, containerManager); + pipelineManager, containerManager, diskBalancerManager); StartDatanodeAdminHandler datanodeStartAdminHandler = new StartDatanodeAdminHandler(scmNodeManager, pipelineManager); ReadOnlyHealthyToHealthyNodeHandler readOnlyHealthyToHealthyNodeHandler = @@ -500,8 +499,6 @@ private void initializeEventHandlers() { new PipelineActionHandler(pipelineManager, scmContext); DiskBalancerReportHandler diskBalancerReportHandler = new DiskBalancerReportHandler(diskBalancerManager); - DiskBalancerDeadNodeHandler diskBalancerDeadNodeHandler = - new DiskBalancerDeadNodeHandler(diskBalancerManager); eventQueue.addHandler(SCMEvents.DATANODE_COMMAND, scmNodeManager); eventQueue.addHandler(SCMEvents.RETRIABLE_DATANODE_COMMAND, scmNodeManager); @@ -580,7 +577,6 @@ private void initializeEventHandlers() { scmBlockManager.getDeletedBlockLog()::onSent); eventQueue.addHandler(SCMEvents.DISK_BALANCER_REPORT, diskBalancerReportHandler); - eventQueue.addHandler(SCMEvents.DEAD_NODE, diskBalancerDeadNodeHandler); } private void initializeCertificateClient() throws IOException { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java index aedf64f926de..fd0b71d77938 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java @@ -88,6 +88,7 @@ public class TestDeadNodeHandler { private File storageDir; private SCMContext scmContext; private DeletedBlockLog deletedBlockLog; + private DiskBalancerManager diskBalancerManager; @BeforeEach public void setup() throws IOException, AuthenticationException { @@ -114,8 +115,9 @@ public void setup() throws IOException, AuthenticationException { mockRatisProvider); containerManager = scm.getContainerManager(); deletedBlockLog = mock(DeletedBlockLog.class); + diskBalancerManager = mock(DiskBalancerManager.class); deadNodeHandler = new DeadNodeHandler(nodeManager, - mock(PipelineManager.class), containerManager, deletedBlockLog); + mock(PipelineManager.class), containerManager, diskBalancerManager, deletedBlockLog); healthyReadOnlyNodeHandler = new HealthyReadOnlyNodeHandler(nodeManager, pipelineManager); @@ -284,6 +286,41 @@ public void testOnMessage(@TempDir File tempDir) throws Exception { } + @Test + public void testDiskBalancerStatusUnknownForDeadNode() throws Exception { + DatanodeDetails healthyDn = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails deadDn = MockDatanodeDetails.randomDatanodeDetails(); + + // Register healthy datanode + nodeManager.register(healthyDn, + HddsTestUtils.createNodeReport(Collections.emptyList(), + Collections.emptyList()), null); + + // Register dead datanode + nodeManager.register(deadDn, + HddsTestUtils.createNodeReport(Collections.emptyList(), + Collections.emptyList()), null); + + diskBalancerManager.addRunningDatanode(healthyDn); + diskBalancerManager.addRunningDatanode(deadDn); + + // Verify initial status of both datanodes + assertEquals(HddsProtos.DiskBalancerRunningStatus.RUNNING, + diskBalancerManager.getStatus(healthyDn).getRunningStatus()); + assertEquals(HddsProtos.DiskBalancerRunningStatus.RUNNING, + diskBalancerManager.getStatus(deadDn).getRunningStatus()); + + deadNodeHandler.onMessage(deadDn, publisher); + + // Verify that the status of the dead datanode is updated to UNKNOWN + assertEquals(HddsProtos.DiskBalancerRunningStatus.UNKNOWN, + diskBalancerManager.getStatus(deadDn).getRunningStatus()); + + // Verify that the status of the healthy datanode remains unchanged + assertEquals(HddsProtos.DiskBalancerRunningStatus.RUNNING, + diskBalancerManager.getStatus(healthyDn).getRunningStatus()); + } + private void registerReplicas(ContainerManager contManager, ContainerInfo container, DatanodeDetails... datanodes) throws ContainerNotFoundException { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerDeadNodeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerDeadNodeHandler.java deleted file mode 100644 index d4f8104c5898..000000000000 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerDeadNodeHandler.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdds.scm.node; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.util.UUID; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DiskBalancerRunningStatus; -import org.apache.hadoop.hdds.scm.ha.SCMContext; -import org.apache.hadoop.hdds.server.events.EventQueue; -import org.apache.ozone.test.GenericTestUtils; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** - * Unit tests for DiskBalancerDeadNodeHandler. - */ -public class TestDiskBalancerDeadNodeHandler { - - private DiskBalancerManager diskBalancerManager; - private DiskBalancerDeadNodeHandler deadNodeHandler; - private DatanodeDetails healthyDn; - private DatanodeDetails deadDn; - - @BeforeEach - public void setup() throws Exception { - OzoneConfiguration conf = new OzoneConfiguration(); - String storageDir = GenericTestUtils.getTempPath( - TestDiskBalancerDeadNodeHandler.class.getSimpleName() + UUID.randomUUID()); - conf.set("ozone.metadata.dirs", storageDir); - - diskBalancerManager = new DiskBalancerManager(conf, new EventQueue(), - SCMContext.emptyContext(), null); - deadNodeHandler = new DiskBalancerDeadNodeHandler(diskBalancerManager); - - // Create two DNs: one healthy and one dead - healthyDn = DatanodeDetails.newBuilder().setUuid(UUID.randomUUID()).build(); - deadDn = DatanodeDetails.newBuilder().setUuid(UUID.randomUUID()).build(); - - diskBalancerManager.addRunningDatanode(healthyDn); - diskBalancerManager.addRunningDatanode(deadDn); - } - - @Test - public void testDeadNodeHandlerUpdatesStatusToUnknown() { - // Verify initial status of both datanodes - assertEquals(DiskBalancerRunningStatus.RUNNING, diskBalancerManager.getStatus(healthyDn).getRunningStatus()); - assertEquals(DiskBalancerRunningStatus.RUNNING, diskBalancerManager.getStatus(deadDn).getRunningStatus()); - - deadNodeHandler.onMessage(deadDn, null); - - // Verify that the status of the dead datanode is updated to UNKNOWN - assertEquals(DiskBalancerRunningStatus.UNKNOWN, diskBalancerManager.getStatus(deadDn).getRunningStatus()); - - // Verify that the status of the healthy datanode remains unchanged - assertEquals(DiskBalancerRunningStatus.RUNNING, diskBalancerManager.getStatus(healthyDn).getRunningStatus()); - } -} From 1a04514e298e8e06268d5fed3ff5026353bc4ea1 Mon Sep 17 00:00:00 2001 From: Gargi Jaiswal Date: Wed, 16 Apr 2025 17:29:25 +0530 Subject: [PATCH 4/6] fixed unit test --- .../hdds/scm/node/TestDeadNodeHandler.java | 52 ++++++------------- 1 file changed, 16 insertions(+), 36 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java index fd0b71d77938..f655c921848b 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java @@ -115,7 +115,8 @@ public void setup() throws IOException, AuthenticationException { mockRatisProvider); containerManager = scm.getContainerManager(); deletedBlockLog = mock(DeletedBlockLog.class); - diskBalancerManager = mock(DiskBalancerManager.class); + diskBalancerManager = new DiskBalancerManager(conf, new EventQueue(), + SCMContext.emptyContext(), null);; deadNodeHandler = new DeadNodeHandler(nodeManager, mock(PipelineManager.class), containerManager, diskBalancerManager, deletedBlockLog); healthyReadOnlyNodeHandler = @@ -219,6 +220,11 @@ public void testOnMessage(@TempDir File tempDir) throws Exception { HddsTestUtils.quasiCloseContainer(containerManager, container3.containerID()); + //starting diskBalancer on all Datanodes + diskBalancerManager.addRunningDatanode(datanode1); + diskBalancerManager.addRunningDatanode(datanode2); + diskBalancerManager.addRunningDatanode(datanode3); + // First set the node to IN_MAINTENANCE and ensure the container replicas // are not removed on the dead event datanode1 = nodeManager.getNode(datanode1.getID()); @@ -235,6 +241,15 @@ public void testOnMessage(@TempDir File tempDir) throws Exception { verify(deletedBlockLog, times(0)) .onDatanodeDead(datanode1.getUuid()); + // Verify DiskBalancer status is marked UNKNOWN for the dead datanode + assertEquals(HddsProtos.DiskBalancerRunningStatus.UNKNOWN, + diskBalancerManager.getStatus(datanode1).getRunningStatus()); + // Verify DiskBalancer status remains unchanged for other datanodes + assertEquals(HddsProtos.DiskBalancerRunningStatus.RUNNING, + diskBalancerManager.getStatus(datanode2).getRunningStatus()); + assertEquals(HddsProtos.DiskBalancerRunningStatus.RUNNING, + diskBalancerManager.getStatus(datanode3).getRunningStatus()); + Set container1Replicas = containerManager .getContainerReplicas(ContainerID.valueOf(container1.getContainerID())); assertEquals(2, container1Replicas.size()); @@ -286,41 +301,6 @@ public void testOnMessage(@TempDir File tempDir) throws Exception { } - @Test - public void testDiskBalancerStatusUnknownForDeadNode() throws Exception { - DatanodeDetails healthyDn = MockDatanodeDetails.randomDatanodeDetails(); - DatanodeDetails deadDn = MockDatanodeDetails.randomDatanodeDetails(); - - // Register healthy datanode - nodeManager.register(healthyDn, - HddsTestUtils.createNodeReport(Collections.emptyList(), - Collections.emptyList()), null); - - // Register dead datanode - nodeManager.register(deadDn, - HddsTestUtils.createNodeReport(Collections.emptyList(), - Collections.emptyList()), null); - - diskBalancerManager.addRunningDatanode(healthyDn); - diskBalancerManager.addRunningDatanode(deadDn); - - // Verify initial status of both datanodes - assertEquals(HddsProtos.DiskBalancerRunningStatus.RUNNING, - diskBalancerManager.getStatus(healthyDn).getRunningStatus()); - assertEquals(HddsProtos.DiskBalancerRunningStatus.RUNNING, - diskBalancerManager.getStatus(deadDn).getRunningStatus()); - - deadNodeHandler.onMessage(deadDn, publisher); - - // Verify that the status of the dead datanode is updated to UNKNOWN - assertEquals(HddsProtos.DiskBalancerRunningStatus.UNKNOWN, - diskBalancerManager.getStatus(deadDn).getRunningStatus()); - - // Verify that the status of the healthy datanode remains unchanged - assertEquals(HddsProtos.DiskBalancerRunningStatus.RUNNING, - diskBalancerManager.getStatus(healthyDn).getRunningStatus()); - } - private void registerReplicas(ContainerManager contManager, ContainerInfo container, DatanodeDetails... datanodes) throws ContainerNotFoundException { From 40cc48b42a032be727b55c7b4f67908fbeed3bf6 Mon Sep 17 00:00:00 2001 From: Gargi Jaiswal Date: Wed, 16 Apr 2025 17:35:33 +0530 Subject: [PATCH 5/6] fixed checkstyle --- .../org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java index f655c921848b..5930d7cada67 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDeadNodeHandler.java @@ -116,7 +116,7 @@ public void setup() throws IOException, AuthenticationException { containerManager = scm.getContainerManager(); deletedBlockLog = mock(DeletedBlockLog.class); diskBalancerManager = new DiskBalancerManager(conf, new EventQueue(), - SCMContext.emptyContext(), null);; + SCMContext.emptyContext(), null); deadNodeHandler = new DeadNodeHandler(nodeManager, mock(PipelineManager.class), containerManager, diskBalancerManager, deletedBlockLog); healthyReadOnlyNodeHandler = From a256cdf8960e26ede471cbdc2f1915f7a32f8369 Mon Sep 17 00:00:00 2001 From: Gargi Jaiswal Date: Mon, 21 Apr 2025 14:17:02 +0530 Subject: [PATCH 6/6] added nullable for diskbalancermanager --- .../org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java index 875e073d5579..256ac3f21aa5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DeadNodeHandler.java @@ -51,6 +51,7 @@ public class DeadNodeHandler implements EventHandler { private final ContainerManager containerManager; @Nullable private final DeletedBlockLog deletedBlockLog; + @Nullable private final DiskBalancerManager diskBalancerManager; private static final Logger LOG = @@ -132,8 +133,10 @@ public void onMessage(final DatanodeDetails datanodeDetails, } // Mark DiskBalancer status as UNKNOWN for the dead datanode - LOG.info("Marking DiskBalancer status UNKNOWN for dead DN {}", datanodeDetails.getUuidString()); - diskBalancerManager.markStatusUnknown(datanodeDetails); + if (diskBalancerManager != null) { + LOG.info("Marking DiskBalancer status UNKNOWN for dead DN {}", datanodeDetails.getUuidString()); + diskBalancerManager.markStatusUnknown(datanodeDetails); + } } catch (NodeNotFoundException ex) { // This should not happen, we cannot get a dead node event for an // unregistered datanode!