From 168259d3cc86fb73a9b50bcf2433db16a4254cf1 Mon Sep 17 00:00:00 2001 From: Gargi Jaiswal Date: Thu, 18 Dec 2025 15:13:39 +0530 Subject: [PATCH 1/4] display hostname of dns instead of ipAddress in cli output --- .../AbstractDiskBalancerSubCommand.java | 12 +++++--- .../datanode/DiskBalancerStartSubcommand.java | 7 +++-- .../datanode/DiskBalancerStopSubcommand.java | 9 ++++-- .../datanode/DiskBalancerSubCommandUtil.java | 28 +++++++++++++++++++ .../DiskBalancerUpdateSubcommand.java | 5 +++- .../scm/node/TestVolumeChoosingPolicy.java | 2 +- 6 files changed, 53 insertions(+), 10 deletions(-) diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/AbstractDiskBalancerSubCommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/AbstractDiskBalancerSubCommand.java index 1c92bd831cd8..56c0caac1061 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/AbstractDiskBalancerSubCommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/AbstractDiskBalancerSubCommand.java @@ -86,12 +86,16 @@ public Void call() throws Exception { for (String dn : targetDatanodes) { try { Object result = executeCommand(dn); - successNodes.add(dn); + // Get hostname for display (fallback to original address if it fails) + String hostname = DiskBalancerSubCommandUtil.getDatanodeHostname(dn); + successNodes.add(hostname); if (options.isJson()) { jsonResults.add(result); } } catch (Exception e) { - failedNodes.add(dn); + // Get hostname for error display (fallback to original address if it fails) + String hostname = DiskBalancerSubCommandUtil.getDatanodeHostname(dn); + failedNodes.add(hostname); String errorMsg = e.getMessage(); if (errorMsg != null && errorMsg.contains("\n")) { errorMsg = errorMsg.split("\n", 2)[0]; @@ -101,11 +105,11 @@ public Void call() throws Exception { } if (options.isJson()) { // Create error result object in JSON format - Map errorResult = createErrorResult(dn, errorMsg); + Map errorResult = createErrorResult(hostname, errorMsg); jsonResults.add(errorResult); } else { // Print error messages in non-JSON mode - System.err.printf("Error on node [%s]: %s%n", dn, errorMsg); + System.err.printf("Error on node [%s]: %s%n", hostname, errorMsg); } } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java index 364065f99d09..27a72513811c 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java @@ -63,9 +63,12 @@ protected Object executeCommand(String hostName) throws IOException { try { DiskBalancerConfigurationProto config = buildConfigProto(); diskBalancerProxy.startDiskBalancer(config); - + + // Get hostname for consistent JSON output + String dnHostname = DiskBalancerSubCommandUtil.getDatanodeHostname(hostName); + Map result = new LinkedHashMap<>(); - result.put("datanode", hostName); + result.put("datanode", dnHostname); result.put("action", "start"); result.put("status", "success"); Map configMap = getConfigurationMap(); diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStopSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStopSubcommand.java index ba3355cb9d5f..4fa84f3792b0 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStopSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStopSubcommand.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdds.scm.cli.datanode; import java.io.IOException; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.hdds.cli.HddsVersionProvider; @@ -40,8 +41,12 @@ protected Object executeCommand(String hostName) throws IOException { .getSingleNodeDiskBalancerProxy(hostName); try { diskBalancerProxy.stopDiskBalancer(); - Map result = new java.util.LinkedHashMap<>(); - result.put("datanode", hostName); + + // Get hostname for consistent JSON output + String dnHostname = DiskBalancerSubCommandUtil.getDatanodeHostname(hostName); + + Map result = new LinkedHashMap<>(); + result.put("datanode", dnHostname); result.put("action", "stop"); result.put("status", "success"); return result; diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerSubCommandUtil.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerSubCommandUtil.java index b5e0b4e57dde..be30f9ef2377 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerSubCommandUtil.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerSubCommandUtil.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port; import org.apache.hadoop.hdds.protocol.DiskBalancerProtocol; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDiskBalancerInfoProto; import org.apache.hadoop.hdds.protocolPB.DiskBalancerProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.client.ScmClient; import org.apache.hadoop.net.NetUtils; @@ -119,5 +120,32 @@ public static List getAllOperableNodesClientRpcAddress( return addresses; } + + /** + * Gets the hostname of a datanode by querying its DiskBalancer info. + * This ensures consistent hostname display in output, even when connecting via IP address. + * + * @param address the datanode address (can be IP:port or hostname:port) + * @return the hostname of the datanode, or the original address if hostname cannot be retrieved + */ + public static String getDatanodeHostname(String address) { + DiskBalancerProtocol diskBalancerProxy = null; + try { + diskBalancerProxy = getSingleNodeDiskBalancerProxy(address); + DatanodeDiskBalancerInfoProto status = diskBalancerProxy.getDiskBalancerInfo(); + return status.getNode().getHostName(); + } catch (IOException e) { + // If we can't get the hostname, fall back to the original address + return address; + } finally { + if (diskBalancerProxy != null) { + try { + diskBalancerProxy.close(); + } catch (IOException e) { + // Ignore close errors + } + } + } + } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java index 4ba554df82ea..d417bc421bbd 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java @@ -73,8 +73,11 @@ protected Object executeCommand(String hostName) throws IOException { HddsProtos.DiskBalancerConfigurationProto config = buildConfigProto(); diskBalancerProxy.updateDiskBalancerConfiguration(config); + // Get hostname for consistent JSON output + String dnHostname = DiskBalancerSubCommandUtil.getDatanodeHostname(hostName); + Map result = new LinkedHashMap<>(); - result.put("datanode", hostName); + result.put("datanode", dnHostname); result.put("action", "update"); result.put("status", "success"); Map configMap = getConfigurationMap(); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestVolumeChoosingPolicy.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestVolumeChoosingPolicy.java index b65cfa637c83..1bb20c1ce716 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestVolumeChoosingPolicy.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestVolumeChoosingPolicy.java @@ -262,7 +262,7 @@ private void createVolumes() throws IOException { } // Initialize the volumeSet with the new volume map - volumeSet.setVolumeMap(newVolumeMap); + volumeSet.setVolumeMapForTesting(newVolumeMap); System.out.println("Created " + NUM_VOLUMES + " volumes in " + (System.currentTimeMillis() - startTime) + " ms"); } From dff28b24b392ef52a03d7ce67803d995f8c99289 Mon Sep 17 00:00:00 2001 From: Gargi Jaiswal Date: Fri, 19 Dec 2025 10:36:27 +0530 Subject: [PATCH 2/4] avoid sending another start op on already active dn with diskbalancer --- .../TestDiskBalancerProtocolServer.java | 19 +++ .../datanode/DiskBalancerStartSubcommand.java | 109 +++++++++++++----- 2 files changed, 102 insertions(+), 26 deletions(-) diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerProtocolServer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerProtocolServer.java index 361f5cce051b..c7b1858ef72d 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerProtocolServer.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerProtocolServer.java @@ -259,5 +259,24 @@ void testUpdateRequiresAdmin() { assertEquals("Access denied for operation: updateDiskBalancerConfiguration", exception.getMessage()); } + + @Test + void testStartDiskBalancerWhenAlreadyRunning() throws IOException { + // Start DiskBalancer first + server.startDiskBalancer(null); + assertEquals(DiskBalancerRunningStatus.RUNNING, diskBalancerInfo.getOperationalState()); + + // Verify service was refreshed once (from the first start) + verify(diskBalancerService, times(1)).refresh(diskBalancerInfo); + + // Try to start again - should log warning and return early without exception + server.startDiskBalancer(null); + + // Verify state is still RUNNING (unchanged) + assertEquals(DiskBalancerRunningStatus.RUNNING, diskBalancerInfo.getOperationalState()); + + // Verify service was not refreshed again (still only once) + verify(diskBalancerService, times(1)).refresh(diskBalancerInfo); + } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java index 27a72513811c..d6b262490b20 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java @@ -18,12 +18,17 @@ package org.apache.hadoop.hdds.scm.cli.datanode; import java.io.IOException; +import java.util.ArrayList; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.DiskBalancerProtocol; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDiskBalancerInfoProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DiskBalancerConfigurationProto; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DiskBalancerRunningStatus; import picocli.CommandLine.Command; import picocli.CommandLine.Option; @@ -56,31 +61,61 @@ public class DiskBalancerStartSubcommand extends AbstractDiskBalancerSubCommand arity = "1") private Boolean stopAfterDiskEven; + // Track nodes that are already running for display purposes (using Set to avoid duplicates) + private final Set alreadyRunningNodes = new LinkedHashSet<>(); + @Override protected Object executeCommand(String hostName) throws IOException { DiskBalancerProtocol diskBalancerProxy = DiskBalancerSubCommandUtil .getSingleNodeDiskBalancerProxy(hostName); try { + // Check if DiskBalancer is already running before starting + DatanodeDiskBalancerInfoProto status = diskBalancerProxy.getDiskBalancerInfo(); + String dnHostname = DiskBalancerSubCommandUtil.getDatanodeHostname(hostName); + + if (status.getRunningStatus() == DiskBalancerRunningStatus.RUNNING) { + // Track this node as already running + alreadyRunningNodes.add(dnHostname); + + // Return a skipped result + return createJsonResult(dnHostname, "skipped", + "DiskBalancer operation is already running."); + } + + // Not running, proceed with start DiskBalancerConfigurationProto config = buildConfigProto(); diskBalancerProxy.startDiskBalancer(config); - // Get hostname for consistent JSON output - String dnHostname = DiskBalancerSubCommandUtil.getDatanodeHostname(hostName); - - Map result = new LinkedHashMap<>(); - result.put("datanode", dnHostname); - result.put("action", "start"); - result.put("status", "success"); - Map configMap = getConfigurationMap(); - if (configMap != null && !configMap.isEmpty()) { - result.put("configuration", configMap); - } - return result; + // Return a success result + return createJsonResult(dnHostname, "success", null); } finally { diskBalancerProxy.close(); } } + /** + * Create a JSON result map for the start command. + * + * @param hostname the datanode hostname + * @param status the status ("success" or "skipped") + * @param message optional message (for skipped status) + * @return result map + */ + private Map createJsonResult(String hostname, String status, String message) { + Map result = new LinkedHashMap<>(); + result.put("datanode", hostname); + result.put("action", "start"); + result.put("status", status); + if (message != null) { + result.put("message", message); + } + Map configMap = getConfigurationMap(); + if (configMap != null && !configMap.isEmpty()) { + result.put("configuration", configMap); + } + return result; + } + private DiskBalancerConfigurationProto buildConfigProto() { DiskBalancerConfigurationProto.Builder builder = DiskBalancerConfigurationProto.newBuilder(); @@ -107,22 +142,44 @@ protected void displayResults(List successNodes, return; } - if (isBatchMode()) { - if (!failedNodes.isEmpty()) { - System.err.printf("Failed to start DiskBalancer on nodes: [%s]%n", - String.join(", ", failedNodes)); - } else { - System.out.println("Started DiskBalancer on all IN_SERVICE nodes."); - } + // avoid showing duplicate nodes in output + List uniqueSuccessNodes = new ArrayList<>(new LinkedHashSet<>(successNodes)); + List uniqueFailedNodes = new ArrayList<>(new LinkedHashSet<>(failedNodes)); + + // Filter out skipped nodes from successNodes + List actualSuccessNodes = new ArrayList<>(uniqueSuccessNodes); + actualSuccessNodes.removeAll(alreadyRunningNodes); + + // Check if all nodes are already running (batch mode only) + boolean allNodesAlreadyRunning = isBatchMode() && actualSuccessNodes.isEmpty() + && uniqueFailedNodes.isEmpty() && !alreadyRunningNodes.isEmpty(); + + if (allNodesAlreadyRunning) { + System.out.println("DiskBalancer operation is already running on all IN_SERVICE and HEALTHY nodes."); } else { - // Detailed message for specific nodes - if (!successNodes.isEmpty()) { - System.out.printf("Started DiskBalancer on nodes: [%s]%n", - String.join(", ", successNodes)); + // Display warning for nodes that are already running (if not all) + if (!alreadyRunningNodes.isEmpty()) { + System.out.printf("DiskBalancer operation is already running on : [%s]%n", + String.join(", ", alreadyRunningNodes)); } - if (!failedNodes.isEmpty()) { - System.err.printf("Failed to start DiskBalancer on nodes: [%s]%n", - String.join(", ", failedNodes)); + + if (isBatchMode()) { + if (!uniqueFailedNodes.isEmpty()) { + System.err.printf("Failed to start DiskBalancer on nodes: [%s]%n", + String.join(", ", uniqueFailedNodes)); + } + if (!actualSuccessNodes.isEmpty()) { + System.out.println("Started DiskBalancer operation on all other IN_SERVICE and HEALTHY DNs."); + } + } else { + if (!actualSuccessNodes.isEmpty()) { + System.out.printf("Started DiskBalancer on nodes: [%s]%n", + String.join(", ", actualSuccessNodes)); + } + if (!uniqueFailedNodes.isEmpty()) { + System.err.printf("Failed to start DiskBalancer on nodes: [%s]%n", + String.join(", ", uniqueFailedNodes)); + } } } } From 8f8970eb9624499297b7849635f747615fecec6e Mon Sep 17 00:00:00 2001 From: Gargi Jaiswal Date: Fri, 19 Dec 2025 11:26:31 +0530 Subject: [PATCH 3/4] use set instead of list for storing dn hostnames for cli output --- .../AbstractDiskBalancerSubCommand.java | 36 ++++++++++--------- .../DiskBalancerReportSubcommand.java | 3 +- .../datanode/DiskBalancerStartSubcommand.java | 22 +++++------- .../DiskBalancerStatusSubcommand.java | 3 +- .../datanode/DiskBalancerStopSubcommand.java | 4 +-- .../DiskBalancerUpdateSubcommand.java | 6 ++-- 6 files changed, 37 insertions(+), 37 deletions(-) diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/AbstractDiskBalancerSubCommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/AbstractDiskBalancerSubCommand.java index 56c0caac1061..b3e3b0728781 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/AbstractDiskBalancerSubCommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/AbstractDiskBalancerSubCommand.java @@ -20,8 +20,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.Callable; import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; @@ -74,27 +76,29 @@ public Void call() throws Exception { return null; } + // Deduplicate target datanodes by address + Set uniqueTargetDatanodes = new LinkedHashSet<>(targetDatanodes); + targetDatanodes = new ArrayList<>(uniqueTargetDatanodes); + // Track if we're using batch mode for display isBatchMode = options.isInServiceDatanodes(); // Execute on all target datanodes and collect results - List successNodes = new ArrayList<>(); - List failedNodes = new ArrayList<>(); - List jsonResults = new ArrayList<>(); - + Set successNodes = new LinkedHashSet<>(); + Set failedNodes = new LinkedHashSet<>(); + Map jsonResults = new LinkedHashMap<>(); + // Execute commands and collect results for (String dn : targetDatanodes) { + String hostname = DiskBalancerSubCommandUtil.getDatanodeHostname(dn); + try { Object result = executeCommand(dn); - // Get hostname for display (fallback to original address if it fails) - String hostname = DiskBalancerSubCommandUtil.getDatanodeHostname(dn); successNodes.add(hostname); if (options.isJson()) { - jsonResults.add(result); + jsonResults.put(hostname, result); } } catch (Exception e) { - // Get hostname for error display (fallback to original address if it fails) - String hostname = DiskBalancerSubCommandUtil.getDatanodeHostname(dn); failedNodes.add(hostname); String errorMsg = e.getMessage(); if (errorMsg != null && errorMsg.contains("\n")) { @@ -104,20 +108,20 @@ public Void call() throws Exception { errorMsg = e.getClass().getSimpleName(); } if (options.isJson()) { - // Create error result object in JSON format Map errorResult = createErrorResult(hostname, errorMsg); - jsonResults.add(errorResult); + jsonResults.put(hostname, errorResult); } else { // Print error messages in non-JSON mode System.err.printf("Error on node [%s]: %s%n", hostname, errorMsg); } } } - + // Output results if (options.isJson()) { if (!jsonResults.isEmpty()) { - System.out.println(JsonUtils.toJsonStringWithDefaultPrettyPrinter(jsonResults)); + // Convert Map values to List for JSON serialization + System.out.println(JsonUtils.toJsonStringWithDefaultPrettyPrinter(new ArrayList<>(jsonResults.values()))); } } else { displayResults(successNodes, failedNodes); @@ -190,10 +194,10 @@ protected String validateParameters() { * Display consolidated results after executing on all datanodes. * For JSON mode, this may be called for summary purposes only. * - * @param successNodes list of nodes where command succeeded - * @param failedNodes list of nodes where command failed + * @param successNodes set of nodes where command succeeded + * @param failedNodes set of nodes where command failed */ - protected abstract void displayResults(List successNodes, List failedNodes); + protected abstract void displayResults(Set successNodes, Set failedNodes); /** * Get the action name for this command (e.g., "start", "stop", "update", "status", "report"). diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerReportSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerReportSubcommand.java index 9f1d5347860c..9f3f35a0f034 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerReportSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerReportSubcommand.java @@ -22,6 +22,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.DiskBalancerProtocol; @@ -64,7 +65,7 @@ protected Object executeCommand(String hostName) throws IOException { } @Override - protected void displayResults(List successNodes, List failedNodes) { + protected void displayResults(Set successNodes, Set failedNodes) { // In JSON mode, results are already written if (getOptions().isJson()) { return; diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java index d6b262490b20..535d7796de93 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java @@ -18,10 +18,8 @@ package org.apache.hadoop.hdds.scm.cli.datanode; import java.io.IOException; -import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.LinkedHashSet; -import java.util.List; import java.util.Map; import java.util.Set; import org.apache.hadoop.hdds.cli.HddsVersionProvider; @@ -135,24 +133,20 @@ private DiskBalancerConfigurationProto buildConfigProto() { } @Override - protected void displayResults(List successNodes, - List failedNodes) { + protected void displayResults(Set successNodes, + Set failedNodes) { // In JSON mode, results are already written, only show summary if needed if (getOptions().isJson()) { return; } - // avoid showing duplicate nodes in output - List uniqueSuccessNodes = new ArrayList<>(new LinkedHashSet<>(successNodes)); - List uniqueFailedNodes = new ArrayList<>(new LinkedHashSet<>(failedNodes)); - // Filter out skipped nodes from successNodes - List actualSuccessNodes = new ArrayList<>(uniqueSuccessNodes); + Set actualSuccessNodes = new LinkedHashSet<>(successNodes); actualSuccessNodes.removeAll(alreadyRunningNodes); // Check if all nodes are already running (batch mode only) boolean allNodesAlreadyRunning = isBatchMode() && actualSuccessNodes.isEmpty() - && uniqueFailedNodes.isEmpty() && !alreadyRunningNodes.isEmpty(); + && failedNodes.isEmpty() && !alreadyRunningNodes.isEmpty(); if (allNodesAlreadyRunning) { System.out.println("DiskBalancer operation is already running on all IN_SERVICE and HEALTHY nodes."); @@ -164,9 +158,9 @@ protected void displayResults(List successNodes, } if (isBatchMode()) { - if (!uniqueFailedNodes.isEmpty()) { + if (!failedNodes.isEmpty()) { System.err.printf("Failed to start DiskBalancer on nodes: [%s]%n", - String.join(", ", uniqueFailedNodes)); + String.join(", ", failedNodes)); } if (!actualSuccessNodes.isEmpty()) { System.out.println("Started DiskBalancer operation on all other IN_SERVICE and HEALTHY DNs."); @@ -176,9 +170,9 @@ protected void displayResults(List successNodes, System.out.printf("Started DiskBalancer on nodes: [%s]%n", String.join(", ", actualSuccessNodes)); } - if (!uniqueFailedNodes.isEmpty()) { + if (!failedNodes.isEmpty()) { System.err.printf("Failed to start DiskBalancer on nodes: [%s]%n", - String.join(", ", uniqueFailedNodes)); + String.join(", ", failedNodes)); } } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java index 2c097da4d757..47b90b7a5bea 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java @@ -22,6 +22,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.DiskBalancerProtocol; @@ -64,7 +65,7 @@ protected Object executeCommand(String hostName) throws IOException { } @Override - protected void displayResults(List successNodes, List failedNodes) { + protected void displayResults(Set successNodes, Set failedNodes) { // In JSON mode, results are already written if (getOptions().isJson()) { return; diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStopSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStopSubcommand.java index 4fa84f3792b0..25ff7a729434 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStopSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStopSubcommand.java @@ -19,8 +19,8 @@ import java.io.IOException; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.DiskBalancerProtocol; import picocli.CommandLine.Command; @@ -56,7 +56,7 @@ protected Object executeCommand(String hostName) throws IOException { } @Override - protected void displayResults(List successNodes, List failedNodes) { + protected void displayResults(Set successNodes, Set failedNodes) { // In JSON mode, results are already written, only show summary if needed if (getOptions().isJson()) { return; diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java index d417bc421bbd..b116d0c238db 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java @@ -19,8 +19,8 @@ import java.io.IOException; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.DiskBalancerProtocol; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -109,8 +109,8 @@ private HddsProtos.DiskBalancerConfigurationProto buildConfigProto() { } @Override - protected void displayResults(List successNodes, - List failedNodes) { + protected void displayResults(Set successNodes, + Set failedNodes) { // In JSON mode, results are already written, only show summary if needed if (getOptions().isJson()) { return; From 50c36a864fd7896bc61e9ce244005e6d5cb51348 Mon Sep 17 00:00:00 2001 From: Gargi Jaiswal Date: Fri, 19 Dec 2025 11:52:14 +0530 Subject: [PATCH 4/4] updated unit tests --- .../DiskBalancerProtocolServer.java | 9 +++ .../datanode/DiskBalancerStartSubcommand.java | 10 ++- .../datanode/TestDiskBalancerSubCommands.java | 61 ++++++++++++++++--- .../ozone/scm/node/TestDiskBalancer.java | 28 +++++++++ 4 files changed, 98 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerProtocolServer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerProtocolServer.java index b92391dc8b9d..4040b9a88b4a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerProtocolServer.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerProtocolServer.java @@ -91,6 +91,15 @@ public void startDiskBalancer(DiskBalancerConfigurationProto configProto) adminChecker.check("startDiskBalancer"); final DiskBalancerInfo info = getDiskBalancerInfoImpl(); + // Check if DiskBalancer is already running or paused + DiskBalancerRunningStatus currentStatus = info.getOperationalState(); + if (currentStatus == DiskBalancerRunningStatus.RUNNING + || currentStatus == DiskBalancerRunningStatus.PAUSED) { + // If already running/paused and no configuration change requested, log warning and return early + LOG.warn("DiskBalancer is already in {} state.", currentStatus); + return; + } + // Check node operational state before starting DiskBalancer // Only IN_SERVICE nodes should actively balance disks NodeOperationalState nodeState = diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java index 535d7796de93..6fee7909f665 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java @@ -71,7 +71,8 @@ protected Object executeCommand(String hostName) throws IOException { DatanodeDiskBalancerInfoProto status = diskBalancerProxy.getDiskBalancerInfo(); String dnHostname = DiskBalancerSubCommandUtil.getDatanodeHostname(hostName); - if (status.getRunningStatus() == DiskBalancerRunningStatus.RUNNING) { + if (status.getRunningStatus() == DiskBalancerRunningStatus.RUNNING || + status.getRunningStatus() == DiskBalancerRunningStatus.PAUSED) { // Track this node as already running alreadyRunningNodes.add(dnHostname); @@ -163,7 +164,12 @@ protected void displayResults(Set successNodes, String.join(", ", failedNodes)); } if (!actualSuccessNodes.isEmpty()) { - System.out.println("Started DiskBalancer operation on all other IN_SERVICE and HEALTHY DNs."); + // If no nodes were skipped, use simpler message + if (alreadyRunningNodes.isEmpty()) { + System.out.println("Started DiskBalancer on all IN_SERVICE nodes."); + } else { + System.out.println("Started DiskBalancer operation on all other IN_SERVICE and HEALTHY DNs."); + } } } else { if (!actualSuccessNodes.isEmpty()) { diff --git a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDiskBalancerSubCommands.java b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDiskBalancerSubCommands.java index 105cb5d8566b..8a4efb4ddaaf 100644 --- a/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDiskBalancerSubCommands.java +++ b/hadoop-ozone/cli-admin/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDiskBalancerSubCommands.java @@ -136,6 +136,14 @@ private DiskBalancerMocks setupAllMocks() { mockedUtil.when(() -> DiskBalancerSubCommandUtil .getSingleNodeDiskBalancerProxy(anyString())) .thenReturn(mockProtocol); + // Mock getDatanodeHostname to extract hostname from address (e.g., "host-1:19864" -> "host-1") + mockedUtil.when(() -> DiskBalancerSubCommandUtil.getDatanodeHostname(anyString())) + .thenAnswer(invocation -> { + String address = invocation.getArgument(0); + // Extract hostname from "hostname:port" or just return "hostname" + int colonIndex = address.indexOf(':'); + return colonIndex > 0 ? address.substring(0, colonIndex) : address; + }); return new DiskBalancerMocks(mockedConf, mockedClient, mockedUtil); } @@ -151,10 +159,17 @@ public void tearDown() { @Test public void testStartDiskBalancerWithInServiceDatanodes() throws Exception { DiskBalancerStartSubcommand cmd = new DiskBalancerStartSubcommand(); - doNothing().when(mockProtocol).startDiskBalancer(any(DiskBalancerConfigurationProto.class)); - // Set up all required mocks + // Set up all required mocks first try (DiskBalancerMocks mocks = setupAllMocks()) { + // Now set up protocol mocks after static mocks are active + doNothing().when(mockProtocol).startDiskBalancer(any(DiskBalancerConfigurationProto.class)); + + // Mock getDiskBalancerInfo to return STOPPED status for all nodes + when(mockProtocol.getDiskBalancerInfo()) + .thenReturn(createStoppedStatusProto("host-1"), + createStoppedStatusProto("host-2"), + createStoppedStatusProto("host-3")); CommandLine c = new CommandLine(cmd); c.parseArgs("--in-service-datanodes"); @@ -171,9 +186,12 @@ public void testStartDiskBalancerWithInServiceDatanodes() throws Exception { @Test public void testStartDiskBalancerWithConfiguration() throws Exception { DiskBalancerStartSubcommand cmd = new DiskBalancerStartSubcommand(); - doNothing().when(mockProtocol).startDiskBalancer(any(DiskBalancerConfigurationProto.class)); try (DiskBalancerMocks mocks = setupAllMocks()) { + // Set up protocol mocks after static mocks are active + doNothing().when(mockProtocol).startDiskBalancer(any(DiskBalancerConfigurationProto.class)); + when(mockProtocol.getDiskBalancerInfo()) + .thenReturn(createStoppedStatusProto("host-1")); CommandLine c = new CommandLine(cmd); c.parseArgs("-t", "0.005", "-b", "100", "-p", "5", "-s", "false", "host-1"); @@ -188,9 +206,14 @@ public void testStartDiskBalancerWithConfiguration() throws Exception { @Test public void testStartDiskBalancerWithMultipleNodes() throws Exception { DiskBalancerStartSubcommand cmd = new DiskBalancerStartSubcommand(); - doNothing().when(mockProtocol).startDiskBalancer(any(DiskBalancerConfigurationProto.class)); try (DiskBalancerMocks mocks = setupAllMocks()) { + // Set up protocol mocks after static mocks are active + doNothing().when(mockProtocol).startDiskBalancer(any(DiskBalancerConfigurationProto.class)); + when(mockProtocol.getDiskBalancerInfo()) + .thenReturn(createStoppedStatusProto("host-1"), + createStoppedStatusProto("host-2"), + createStoppedStatusProto("host-3")); CommandLine c = new CommandLine(cmd); c.parseArgs("host-1", "host-2", "host-3"); @@ -205,12 +228,17 @@ public void testStartDiskBalancerWithMultipleNodes() throws Exception { @Test public void testStartDiskBalancerWithStdin() throws Exception { DiskBalancerStartSubcommand cmd = new DiskBalancerStartSubcommand(); - doNothing().when(mockProtocol).startDiskBalancer(any(DiskBalancerConfigurationProto.class)); String input = "host-1\nhost-2\nhost-3\n"; System.setIn(new ByteArrayInputStream(input.getBytes(DEFAULT_ENCODING))); try (DiskBalancerMocks mocks = setupAllMocks()) { + // Set up protocol mocks after static mocks are active + doNothing().when(mockProtocol).startDiskBalancer(any(DiskBalancerConfigurationProto.class)); + when(mockProtocol.getDiskBalancerInfo()) + .thenReturn(createStoppedStatusProto("host-1"), + createStoppedStatusProto("host-2"), + createStoppedStatusProto("host-3")); CommandLine c = new CommandLine(cmd); c.parseArgs("-"); @@ -225,9 +253,12 @@ public void testStartDiskBalancerWithStdin() throws Exception { @Test public void testStartDiskBalancerWithJson() throws Exception { DiskBalancerStartSubcommand cmd = new DiskBalancerStartSubcommand(); - doNothing().when(mockProtocol).startDiskBalancer(any(DiskBalancerConfigurationProto.class)); try (DiskBalancerMocks mocks = setupAllMocks()) { + // Set up protocol mocks after static mocks are active + doNothing().when(mockProtocol).startDiskBalancer(any(DiskBalancerConfigurationProto.class)); + when(mockProtocol.getDiskBalancerInfo()) + .thenReturn(createStoppedStatusProto("host-1")); CommandLine c = new CommandLine(cmd); c.parseArgs("--json", "-t", "0.005", "-b", "100", "host-1"); @@ -246,10 +277,13 @@ public void testStartDiskBalancerWithJson() throws Exception { @Test public void testStartDiskBalancerFailure() throws Exception { DiskBalancerStartSubcommand cmd = new DiskBalancerStartSubcommand(); - doThrow(new IOException("Connection failed")).when(mockProtocol) - .startDiskBalancer(any(DiskBalancerConfigurationProto.class)); try (DiskBalancerMocks mocks = setupAllMocks()) { + // Set up protocol mocks after static mocks are active + when(mockProtocol.getDiskBalancerInfo()) + .thenReturn(createStoppedStatusProto("host-1")); + doThrow(new IOException("Connection failed")).when(mockProtocol) + .startDiskBalancer(any(DiskBalancerConfigurationProto.class)); CommandLine c = new CommandLine(cmd); c.parseArgs("host-1"); @@ -625,6 +659,17 @@ public void testReportDiskBalancerFailure() throws Exception { } } + /** + * Creates a STOPPED status proto for a given hostname. + * Used in start tests to ensure DiskBalancer is not already running. + * @param hostname the hostname + * @return DatanodeDiskBalancerInfoProto with STOPPED status + */ + private DatanodeDiskBalancerInfoProto createStoppedStatusProto(String hostname) { + return createStatusProto(hostname, DiskBalancerRunningStatus.STOPPED, + 10.0, 20L, 5, 0L, 0L, 0L, 0L); + } + @SuppressWarnings("checkstyle:ParameterNumber") private DatanodeDiskBalancerInfoProto createStatusProto(String hostname, DiskBalancerRunningStatus status, double threshold, long bandwidthInMB, diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java index 6df761e45f5a..07cfece619f2 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java @@ -54,6 +54,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; @@ -93,6 +94,33 @@ public static void cleanup() throws Exception { } } + @AfterEach + public void stopDiskBalancer() throws IOException, InterruptedException, TimeoutException { + // Stop disk balancer on all DNs after each test to ensure clean state + NodeManager nm = cluster.getStorageContainerManager().getScmNodeManager(); + List allDatanodes = new ArrayList<>(nm.getAllNodes()); + + for (DatanodeDetails dn : allDatanodes) { + try (DiskBalancerProtocol proxy = getDiskBalancerProxy(dn)) { + proxy.stopDiskBalancer(); + } catch (IOException e) { + // Ignore errors when stopping - node might already be stopped + } + } + + // Verify that all DNs have stopped DiskBalancerService + for (DatanodeDetails dn : allDatanodes) { + GenericTestUtils.waitFor(() -> { + try (DiskBalancerProtocol proxy = getDiskBalancerProxy(dn)) { + DatanodeDiskBalancerInfoProto status = proxy.getDiskBalancerInfo(); + return status.getRunningStatus() == DiskBalancerRunningStatus.STOPPED; + } catch (IOException e) { + return false; + } + }, 100, 5000); // Check every 100ms, timeout after 5s + } + } + /** * Helper method to create a DiskBalancerProtocol proxy for a datanode. */