From 3b5a6d484be83270d6a0bf7bfac84410395550ab Mon Sep 17 00:00:00 2001 From: S O'Donnell Date: Tue, 6 Feb 2024 14:13:55 +0000 Subject: [PATCH] HDDS-10304. [DiskBalancer] Start command - Fix nodes not being processed and incorrect config values --- .../commands/DiskBalancerCommand.java | 5 +++++ .../hdds/scm/node/DiskBalancerManager.java | 4 +++- ...ocationProtocolServerSideTranslatorPB.java | 21 +++++++++++-------- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DiskBalancerCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DiskBalancerCommand.java index 1780877787f1..b8c047d1b150 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DiskBalancerCommand.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/DiskBalancerCommand.java @@ -76,4 +76,9 @@ public HddsProtos.DiskBalancerOpType getOpType() { public DiskBalancerConfiguration getDiskBalancerConfiguration() { return diskBalancerConfiguration; } + + @Override + public String toString() { + return getType() + ": opType=" + opType + ", configuration=" + diskBalancerConfiguration; + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java index 1af9bfde4c5a..a45e81e15fb1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java @@ -156,7 +156,7 @@ public List startDiskBalancer( List errors = new ArrayList<>(); for (DatanodeDetails dn : dns) { try { - if (nodeManager.getNodeStatus(dn).isHealthy()) { + if (!nodeManager.getNodeStatus(dn).isHealthy()) { errors.add(new DatanodeAdminError(dn.getHostName(), "Datanode not in healthy state")); continue; @@ -169,6 +169,7 @@ public List startDiskBalancer( HddsProtos.DiskBalancerOpType.START, updateConf); sendCommand(dn, command); } catch (Exception e) { + LOG.info("Caught an error for {}", dn); errors.add(new DatanodeAdminError(dn.getHostName(), e.getMessage())); } } @@ -355,6 +356,7 @@ private void sendCommand(DatanodeDetails dn, DiskBalancerCommand command) { " since not leader SCM.", dn.getUuidString()); return; } + LOG.info("Sending {} to Datanode {}", command, dn); scmNodeEventPublisher.fireEvent(SCMEvents.DATANODE_COMMAND, new CommandForDatanode<>(dn.getUuid(), command)); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 4773aa123fee..011599d58aab 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -1336,23 +1336,26 @@ public DatanodeDiskBalancerOpResponseProto getDatanodeDiskBalancerOp( DatanodeDiskBalancerOpRequestProto request) throws IOException { List errors; + HddsProtos.DiskBalancerConfigurationProto conf = request.getConf(); switch (request.getOpType()) { case START: errors = impl.startDiskBalancer( - Optional.of(request.getConf().getThreshold()), - Optional.of(request.getConf().getDiskBandwidthInMB()), - Optional.of(request.getConf().getParallelThread()), - Optional.of(request.getHostsList())); + conf.hasThreshold() ? Optional.of(conf.getThreshold()) : Optional.empty(), + conf.hasDiskBandwidthInMB() ? Optional.of(conf.getDiskBandwidthInMB()) : Optional.empty(), + conf.hasParallelThread() ? Optional.of(conf.getParallelThread()) : Optional.empty(), + request.getHostsList().isEmpty() ? Optional.empty() : Optional.of(request.getHostsList())); break; case UPDATE: + errors = impl.updateDiskBalancerConfiguration( - Optional.of(request.getConf().getThreshold()), - Optional.of(request.getConf().getDiskBandwidthInMB()), - Optional.of(request.getConf().getParallelThread()), - Optional.of(request.getHostsList())); + conf.hasThreshold() ? Optional.of(conf.getThreshold()) : Optional.empty(), + conf.hasDiskBandwidthInMB() ? Optional.of(conf.getDiskBandwidthInMB()) : Optional.empty(), + conf.hasParallelThread() ? Optional.of(conf.getParallelThread()) : Optional.empty(), + request.getHostsList().isEmpty() ? Optional.empty() : Optional.of(request.getHostsList())); break; case STOP: - errors = impl.stopDiskBalancer(Optional.of(request.getHostsList())); + errors = impl.stopDiskBalancer( + request.getHostsList().isEmpty() ? Optional.empty() : Optional.of(request.getHostsList())); break; default: errors = new ArrayList<>();