Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: Test Example
on:
pull_request:
# So only the changes in examples folder will trigger jobs below.
# any change in the examples folder will trigger check for the corresponding example.
paths:
- 'examples/**'
# run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
Expand All @@ -17,80 +17,89 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
name: Check out all files
matrix: ${{ steps.setup-matrix.outputs.matrix }}
anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }}
name: Detect changed example files
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 2
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Get all changed example files
id: changed-files
uses: tj-actions/changed-files@v35
# Using this can trigger action each time a PR is submitted.
with:
since_last_remote_commit: true
- name: setup matrix
id: set-matrix
id: setup-matrix
run: |
changedFileName=""
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
changedFileName="${file}:${changedFileName}"
done
echo "$changedFileName was changed"
res=`python .github/workflows/scripts/changed_example.py --fileNameList $changedFileName`
echo "All changed files are $res"
loc=$( IFS=',' ; echo "${res[*]}" )
echo "$loc"
echo "::set-output name=matrix::{\"loc\":$(echo "$loc")}"
res=`python .github/workflows/scripts/example_checks/detect_changed_example.py --fileNameList $changedFileName`
echo "All changed examples are $res"

if [ "$x" = "[]" ]; then
echo "anyChanged=false" >> $GITHUB_OUTPUT
echo "matrix=null" >> $GITHUB_OUTPUT
else
dirs=$( IFS=',' ; echo "${res[*]}" )
echo "anyChanged=true" >> $GITHUB_OUTPUT
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
fi

# If no file is changed, it will prompt an error and shows the matrix do not have value.
check-all-changed-files:
check-changed-example:
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
name: Test each changed example files
name: Test the changed example
needs: detect-changed-example
runs-on: [self-hosted, gpu]
strategy:
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 10
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependancies
- name: Install Colossal-AI
run: |
pip install -r ./requirements/requirements.txt
pip install colossalai
- name: List all changed example files
pip install -v .
- name: Test the example
run: |
res=${{ matrix.loc }}
cd "${PWD}/examples/${res}"
example_dir=${{ matrix.directory }}
cd "${PWD}/examples/${example_dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1

# This is for all files' weekly check. Specifically, this job is to find all the directories.
matrix_preparation:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'schedule'
name: Prepare Directory List for All files
name: Prepare matrix for weekly check
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
matrix: ${{ steps.setup-matrix.outputs.matrix }}
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: setup matrix
id: set-matrix
id: setup-matrix
run: |
res=`python .github/workflows/scripts/weekly_check_example.py`
res=`python .github/workflows/scripts/example_checks/check_example_weekly.py`
all_loc=$( IFS=',' ; echo "${res[*]}" )
echo "$all_loc"
echo "::set-output name=matrix::{\"all_loc\":$(echo "$all_loc")}"
echo "Found the examples: $all_loc"
echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT

weekly_check:
if: |
Expand All @@ -104,16 +113,18 @@ jobs:
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
timeout-minutes: 10
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Install the requirements
- name: Install Colossal-AI
run: |
pip install -r ./requirements/requirements.txt
pip install colossalai
pip install -v .
- name: Traverse all files
run: |
dir=${{ matrix.all_loc }}
echo "${dir} is current directory"
cd "${PWD}/examples/${dir}"
example_dir=${{ matrix.diretory }}
echo "Testing ${example_dir} now"
cd "${PWD}/examples/${example_dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,32 @@ on:
required: true

jobs:
manual_check_matrix_preparation:
matrix_preparation:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
name: Check the examples user want
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix-1.outputs.matrix }}
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Get manual directories
id: set-matrix-1
- name: Set up matrix
id: set-matrix
env:
check_dir: ${{ inputs.example_directory }}
run: |
all_mannual_check_dir=()
for cdi in $check_dir
do
all_mannual_check_dir+=("\"${cdi}\"")
done
man_loc=$( IFS=',' ; echo "${all_mannual_check_dir[*]}" )
res=`python .github/workflows/scripts/input_check_example.py --fileNameList $man_loc`
echo "${res} is file existance. 1 for all exist, -1 for at least one file not exist."
if [ res == -1 ];then
exit(1)
res=`python .github/workflows/scripts/example_checks/check_dispatch_inputs.py --fileNameList $check_dir`
if [ res == "failure" ];then
exit -1
fi
man_loc="[${man_loc}]"
echo "$man_loc"
echo "::set-output name=matrix::{\"man_loc\":$(echo "$man_loc")}"
dirs="[${check_dir}]"
echo "Testing examples in $dirs"
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT

manual_check:
test_example:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
Expand All @@ -52,16 +45,19 @@ jobs:
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 10
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Install the requirements
- name: Install Colossal-AI
run: |
pip install -r ./requirements/requirements.txt
pip install colossalai
- name: Traverse all files
pip install -v .
- name: Test the example
run: |
dir=${{ matrix.man_loc }}
echo "${dir} is current directory"
dir=${{ matrix.directory }}
echo "Testing ${dir} now"
cd "${PWD}/examples/${dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import argparse
import os


def check_inputs(input_list):
for path in input_list:
real_path = os.path.join('examples', path)
if not os.path.exists(real_path):
return False
return True


def main():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileNameList', type=str, help="List of file names")
args = parser.parse_args()
name_list = args.fileNameList.split(",")
is_correct = check_inputs(name_list)

if is_correct:
print('success')
else:
print('failure')


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ def show_files(path, all_files):
# Traverse all the folder/file in current directory
file_list = os.listdir(path)
# Determine the element is folder or file. If file, pass it into list, if folder, recurse.
for file in file_list:
for file_name in file_list:
# Get the abs directory using os.path.join() and store into cur_path.
cur_path = os.path.join(path, file)
cur_path = os.path.join(path, file_name)
# Determine whether folder
if os.path.isdir(cur_path):
show_files(cur_path, all_files)
Expand All @@ -26,9 +26,8 @@ def main():
for file_loc in contents:
split_loc = file_loc.split('/')
# must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
if len(split_loc) - split_loc.index('examples') >= 3:
tmp_loc = split_loc[(split_loc.index('examples') + 1):(split_loc.index('examples') + 3)]
re_loc = join(tmp_loc, '/')
if len(split_loc) >= 4:
re_loc = '/'.join(split_loc[1:3])
if re_loc not in all_loc:
all_loc.append(re_loc)
print(all_loc)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,19 @@

def main():
parser = argparse.ArgumentParser()
parser.add_argument('--fileNameList', type=str)
parser.add_argument('-f', '--fileNameList', type=str, help="The list of changed files")
args = parser.parse_args()
name_list = args.fileNameList.split(":")
folder_need_check = set()
for loc in name_list:
# Find only the sub-folder of 'example' folder
# Find only the sub-sub-folder of 'example' folder
# the examples folder structure is like
# - examples
# - area
# - application
# - file
if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
folder_need_check.add(loc.split("/")[1] + "/" + loc.split("/")[2])
folder_need_check.add('/'.join(loc.split("/")[1:3]))
# Output the result using print. Then the shell can get the values.
print(list(folder_need_check))

Expand Down
23 changes: 0 additions & 23 deletions .github/workflows/scripts/input_check_example.py

This file was deleted.

4 changes: 2 additions & 2 deletions examples/tutorial/hybrid_parallel/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
BATCH_SIZE = 256
LEARNING_RATE = 3e-3
WEIGHT_DECAY = 0.3
NUM_EPOCHS = 10
WARMUP_EPOCHS = 3
NUM_EPOCHS = 2
WARMUP_EPOCHS = 1

# model config
IMG_SIZE = 224
Expand Down
1 change: 1 addition & 0 deletions examples/tutorial/hybrid_parallel/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
colossalai >= 0.1.12
torch >= 1.8.1
titans
5 changes: 5 additions & 0 deletions examples/tutorial/hybrid_parallel/test_ci.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -euxo pipefail

pip install -r requirements.txt
torchrun --standalone --nproc_per_node 4 train.py --config config.py -s
6 changes: 3 additions & 3 deletions examples/tutorial/hybrid_parallel/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ def main():
root = os.environ.get('DATA', '../data')
if args.synthetic:
# if we use synthetic dataset
# we train for 30 steps and eval for 10 steps per epoch
train_dataloader = DummyDataloader(length=30, batch_size=gpc.config.BATCH_SIZE)
test_dataloader = DummyDataloader(length=10, batch_size=gpc.config.BATCH_SIZE)
# we train for 10 steps and eval for 5 steps per epoch
train_dataloader = DummyDataloader(length=10, batch_size=gpc.config.BATCH_SIZE)
test_dataloader = DummyDataloader(length=5, batch_size=gpc.config.BATCH_SIZE)
else:
train_dataloader, test_dataloader = build_cifar(gpc.config.BATCH_SIZE, root, pad_if_needed=True)

Expand Down