From 6477502ca963a78f87727f0215716b7e90a69eda Mon Sep 17 00:00:00 2001 From: Charles Antonelli Date: Mon, 2 Mar 2020 15:22:50 -0500 Subject: [PATCH] make tskserver port selection dynamic If multiple tskservers are started concurrently on the same host, only one of them will be able to connect to the (hardcoded) tskserver port. This patch allows dynamic port selection by searching for a monotonically increasing port number for each tskserver instance. This is highly desirable for installations that do not have whole node allocation policies. --- paramrun | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/paramrun b/paramrun index 6f1c61a..d623b30 100755 --- a/paramrun +++ b/paramrun @@ -174,13 +174,14 @@ if [[ "$LAUNCHER_SCHED" == "dynamic" ]]; then RETRY=0 while [[ "$RUNNING" == "false" ]] do + let TSKPORT=9471+RETRY if [[ "$windowsP" == "true" ]]; then - $LAUNCHER_DIR/tskserver $LAUNCHER_NJOBS localhost 9471 2>/dev/null & + $LAUNCHER_DIR/tskserver $LAUNCHER_NJOBS localhost $TSKPORT 2>/dev/null & LAUNCHER_DYN_PID=$! disown $LAUNCHER_DYN_PID sleep 1s else - $LAUNCHER_DIR/tskserver $LAUNCHER_NJOBS $HOSTNAME 9471 2>/dev/null & + $LAUNCHER_DIR/tskserver $LAUNCHER_NJOBS $HOSTNAME $TSKPORT 2>/dev/null & LAUNCHER_DYN_PID=$! disown $LAUNCHER_DYN_PID sleep 1s @@ -190,7 +191,7 @@ if [[ "$LAUNCHER_SCHED" == "dynamic" ]]; then then if [[ $RETRY -ne 10 ]] then - lwarn WARNING "Unable to start dynamic task service. Retrying..." + lwarn WARNING "Unable to start dynamic task service on port $TSKPORT. Retrying..." RETRY=`expr $RETRY + 1` sleep 10s else @@ -198,6 +199,7 @@ if [[ "$LAUNCHER_SCHED" == "dynamic" ]]; then lexit fi else + lwarn NOTE "Started dynamic task service on port $TSKPORT." RUNNING="true" fi done @@ -207,23 +209,24 @@ if [[ "$LAUNCHER_SCHED" == "dynamic" ]]; then else export LAUNCHER_DYN_COUNT="$HOSTNAME" fi - export LAUNCHER_DYN_COUNT_PORT=9471 + export LAUNCHER_DYN_COUNT_PORT=$TSKPORT if [[ $LAUNCHER_USE_PHI -ne "0" ]]; then + let TSKPORT=TSKPORT+1 if [[ $windowsP==true ]]; then #Start another tskserver for the Intel Xeon Phi cards - $LAUNCHER_DIR/tskserver `wc -l $PHI_WORKDIR/$PHI_CONTROL_FILE` localhost 9472 2>/dev/null & + $LAUNCHER_DIR/tskserver `wc -l $PHI_WORKDIR/$PHI_CONTROL_FILE` localhost $TSKPORT 2>/dev/null & LAUNCHER_PHI_DYN_PID=$1 disown $LAUNCHER_PHI_DYN_PID export LAUNCHER_PHI_DYN_COUNT=localhost - export LAUNCHER_PHI_DYN_COUNT_PORT=9472 + export LAUNCHER_PHI_DYN_COUNT_PORT=$TSKPORT else #Start another tskserver for the Intel Xeon Phi cards - $LAUNCHER_DIR/tskserver `wc -l $PHI_WORKDIR/$PHI_CONTROL_FILE` $HOSTNAME 9472 2>/dev/null & + $LAUNCHER_DIR/tskserver `wc -l $PHI_WORKDIR/$PHI_CONTROL_FILE` $HOSTNAME $TSKPORT 2>/dev/null & LAUNCHER_PHI_DYN_PID=$1 disown $LAUNCHER_PHI_DYN_PID export LAUNCHER_PHI_DYN_COUNT="$HOSTNAME" - export LAUNCHER_PHI_DYN_COUNT_PORT=9472 + export LAUNCHER_PHI_DYN_COUNT_PORT=$TSKPORT fi fi fi