From e897a9377a86a6dff555df3a39fc328bd681dccc Mon Sep 17 00:00:00 2001 From: FrankLeeeee Date: Wed, 11 Jan 2023 13:47:10 +0800 Subject: [PATCH] [cli] provided more details if colossalai run fail --- colossalai/cli/launcher/multinode_runner.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/colossalai/cli/launcher/multinode_runner.py b/colossalai/cli/launcher/multinode_runner.py index c45ad5e5a082..a51e1e371f13 100644 --- a/colossalai/cli/launcher/multinode_runner.py +++ b/colossalai/cli/launcher/multinode_runner.py @@ -1,8 +1,10 @@ -import fabric -from .hostinfo import HostInfo, HostInfoList from multiprocessing import Pipe, Process from multiprocessing import connection as mp_connection + import click +import fabric + +from .hostinfo import HostInfo, HostInfoList def run_on_host(hostinfo: HostInfo, workdir: str, recv_conn: mp_connection.Connection, @@ -45,8 +47,10 @@ def run_on_host(hostinfo: HostInfo, workdir: str, recv_conn: mp_connection.Conne # execute on the remote machine fab_conn.run(cmds, hide=False) send_conn.send('success') - except: - click.echo(f"Error: failed to run {cmds} on {hostinfo.hostname}") + except Exception as e: + click.echo( + f"Error: failed to run {cmds} on {hostinfo.hostname}, is localhost: {hostinfo.is_local_host}, exception: {e}" + ) send_conn.send('failure') # shutdown