From 894c8d4b31a34ab3a0d87521f01a9aecb3a4dc1e Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Sat, 19 Apr 2025 15:19:32 -0700 Subject: [PATCH] fix: Move ray worker port range start from 20001 to 53001 Signed-off-by: Terry Kong --- ray.sub | 8 +++-- tools/copyright.sh | 2 +- tools/find_available_port_ranges.py | 49 +++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 4 deletions(-) mode change 100644 => 100755 tools/copyright.sh create mode 100644 tools/find_available_port_ranges.py diff --git a/ray.sub b/ray.sub index cc72295f2a..047d5427b7 100644 --- a/ray.sub +++ b/ray.sub @@ -26,9 +26,11 @@ NODE_MANAGER_PORT=${NODE_MANAGER_PORT:-8077} DASHBOARD_AGENT_PORT=${DASHBOARD_AGENT_PORT:-52365} DASHBOARD_AGENT_GRPC_PORT=${DASHBOARD_AGENT_GRPC_PORT:-52366} METRICS_PORT=${METRICS_PORT:-9002} -# NOTE: Ports start above 20000 since 10001-10257 frequently ran into conflicts -MIN_WORKER_PORT=${MIN_WORKER_PORT:-20001} -MAX_WORKER_PORT=${MAX_WORKER_PORT:-20257} +# On our clusters, the largest port range on an idle worker appeared between 52367-64607 +# (not including the other ports set by this script). So this range is chosen to be +# somewhere in the middle +MIN_WORKER_PORT=${MIN_WORKER_PORT:-53001} +MAX_WORKER_PORT=${MAX_WORKER_PORT:-53257} ######################################################## # Defaults to placing uv cache inside the SLURM_SUBMIT_DIR diff --git a/tools/copyright.sh b/tools/copyright.sh old mode 100644 new mode 100755 index c08f410b84..bebab2d45f --- a/tools/copyright.sh +++ b/tools/copyright.sh @@ -19,7 +19,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) # Move to the project root cd $SCRIPT_DIR/.. find_files_with_missing_copyright() { -find ./nemo_reinforcer/ ./docs/*.py ./examples/ ./tests/ -type f -name '*.py' | while read path; do +find ./nemo_reinforcer/ ./docs/*.py ./examples/ ./tests/ ./tools/ -type f -name '*.py' | while read path; do echo -en $path"\t" head -2 $path | grep -iv 'coding=' | head -1 done \ diff --git a/tools/find_available_port_ranges.py b/tools/find_available_port_ranges.py new file mode 100644 index 0000000000..3255a243f5 --- /dev/null +++ b/tools/find_available_port_ranges.py @@ -0,0 +1,49 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import socket + + +def is_free(port): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(("localhost", port)) != 0 + + +# Print header +print("Size\tRange") +print("-" * 20) + +start = None +for port in range(1024, 65536): + if is_free(port): + if start is None: + start = port + else: + if start is not None: + if start == port - 1: + size = 1 + print(f"{size:4d}\t{start}") + else: + size = port - start + print(f"{size:4d}\t{start}-{port - 1}") + start = None + +# If it ends on a free range, print it +if start is not None: + if start == 65535: + size = 1 + print(f"{size:4d}\t{start}") + else: + size = 65536 - start + print(f"{size:4d}\t{start}-65535")