From 3d92522c8466b8e1d553ed38291d7630d0bade6c Mon Sep 17 00:00:00 2001 From: ver217 Date: Wed, 17 May 2023 17:25:47 +0800 Subject: [PATCH 1/3] [doc] add en cluster utils doc --- docs/source/en/features/cluster_utils.md | 32 ++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 docs/source/en/features/cluster_utils.md diff --git a/docs/source/en/features/cluster_utils.md b/docs/source/en/features/cluster_utils.md new file mode 100644 index 000000000000..0cd06f3027ff --- /dev/null +++ b/docs/source/en/features/cluster_utils.md @@ -0,0 +1,32 @@ +# Cluster Utilities + +Author: [Hongxin Liu](https://github.com/ver217) + +**Prerequisite:** +- [Distributed Training](../concepts/distributed_training.md) + +## Introduction + +We provide a utility class `colossalai.cluster.DistCoordinator` to coordinate distributed training. It's useful to get various information about the cluster, such as the number of nodes, the number of gpus per node, etc. + +## API Reference + +{{ autodoc:colossalai.cluster.DistCoordinator }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_node_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_last_process }} + +{{ autodoc:colossalai.cluster.DistCoordinator.print_on_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.print_on_node_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.priority_execution }} + +{{ autodoc:colossalai.cluster.DistCoordinator.destroy }} + +{{ autodoc:colossalai.cluster.DistCoordinator.block_all }} + +{{ autodoc:colossalai.cluster.DistCoordinator.on_master_only }} From 714c56a0fb47b3b2e065f1995319a3b49f921788 Mon Sep 17 00:00:00 2001 From: ver217 Date: Wed, 17 May 2023 17:28:37 +0800 Subject: [PATCH 2/3] [doc] add zh cluster utils doc --- docs/source/en/features/cluster_utils.md | 2 +- docs/source/zh-Hans/features/cluster_utils.md | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 docs/source/zh-Hans/features/cluster_utils.md diff --git a/docs/source/en/features/cluster_utils.md b/docs/source/en/features/cluster_utils.md index 0cd06f3027ff..1903d64d2563 100644 --- a/docs/source/en/features/cluster_utils.md +++ b/docs/source/en/features/cluster_utils.md @@ -7,7 +7,7 @@ Author: [Hongxin Liu](https://github.com/ver217) ## Introduction -We provide a utility class `colossalai.cluster.DistCoordinator` to coordinate distributed training. It's useful to get various information about the cluster, such as the number of nodes, the number of gpus per node, etc. +We provide a utility class `colossalai.cluster.DistCoordinator` to coordinate distributed training. It's useful to get various information about the cluster, such as the number of nodes, the number of processes per node, etc. ## API Reference diff --git a/docs/source/zh-Hans/features/cluster_utils.md b/docs/source/zh-Hans/features/cluster_utils.md new file mode 100644 index 000000000000..ca787a869041 --- /dev/null +++ b/docs/source/zh-Hans/features/cluster_utils.md @@ -0,0 +1,32 @@ +# 集群实用程序 + +作者: [Hongxin Liu](https://github.com/ver217) + +**前置教程:** +- [分布式训练](../concepts/distributed_training.md) + +## 引言 + +我们提供了一个实用程序类 `colossalai.cluster.DistCoordinator` 来协调分布式训练。它对于获取有关集群的各种信息很有用,例如节点数、每个节点的进程数等。 + +## API 参考 + +{{ autodoc:colossalai.cluster.DistCoordinator }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_node_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.is_last_process }} + +{{ autodoc:colossalai.cluster.DistCoordinator.print_on_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.print_on_node_master }} + +{{ autodoc:colossalai.cluster.DistCoordinator.priority_execution }} + +{{ autodoc:colossalai.cluster.DistCoordinator.destroy }} + +{{ autodoc:colossalai.cluster.DistCoordinator.block_all }} + +{{ autodoc:colossalai.cluster.DistCoordinator.on_master_only }} From 57b66ff8f176232236bf53aabf373eb57388f0a2 Mon Sep 17 00:00:00 2001 From: ver217 Date: Wed, 17 May 2023 17:30:05 +0800 Subject: [PATCH 3/3] [doc] add cluster utils doc in sidebar --- docs/sidebars.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/sidebars.json b/docs/sidebars.json index 44287c17eadf..6dacb9dd2062 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -57,7 +57,8 @@ ] }, "features/pipeline_parallel", - "features/nvme_offload" + "features/nvme_offload", + "features/cluster_utils" ] }, {