From dde76e480c8f811cd5989d4719db9db9dc3f767e Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Fri, 1 Sep 2023 02:47:46 +0000 Subject: [PATCH 01/31] update the ci and currently we ignore the hip_rocm --- .github/workflows/build-skip.yml | 2 +- .github/workflows/build.yml | 2 +- .github/workflows/docker-build-skip.yml | 2 +- .github/workflows/docker-build.yml | 2 +- .github/workflows/gpt-ci.yml-old | 162 +++++++++++++++++++++++ .github/workflows/gpu-ci.yml | 163 ------------------------ .github/workflows/per-lib-check.yml | 2 +- 7 files changed, 167 insertions(+), 168 deletions(-) create mode 100644 .github/workflows/gpt-ci.yml-old delete mode 100644 .github/workflows/gpu-ci.yml diff --git a/.github/workflows/build-skip.yml b/.github/workflows/build-skip.yml index a983d6dda4..e3e23f13f8 100644 --- a/.github/workflows/build-skip.yml +++ b/.github/workflows/build-skip.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - gpu_backend: ["cuda", "hip_rocm"] + gpu_backend: ["cuda"] fail-fast: false steps: - run: 'echo "No build required"' diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9e44a59720..1a9c486f55 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -34,7 +34,7 @@ jobs: shell: bash -l {0} # required to use an activated conda environment strategy: matrix: - gpu_backend: ["cuda", "hip_rocm"] + gpu_backend: ["cuda"] fail-fast: false steps: - name: Checkout Git Repository diff --git a/.github/workflows/docker-build-skip.yml b/.github/workflows/docker-build-skip.yml index a09979283f..9fb3f71f48 100644 --- a/.github/workflows/docker-build-skip.yml +++ b/.github/workflows/docker-build-skip.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - gpu_backend: ["cuda", "hip_rocm"] + gpu_backend: ["cuda"] fail-fast: false steps: - run: 'echo "No docker-build required"' diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index d54750c9d4..e8bdc69d59 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -24,7 +24,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - gpu_backend: ["cuda", "hip_rocm"] + gpu_backend: ["cuda"] fail-fast: false steps: - name: Checkout Git Repository diff --git a/.github/workflows/gpt-ci.yml-old b/.github/workflows/gpt-ci.yml-old new file mode 100644 index 0000000000..6132f90cba --- /dev/null +++ b/.github/workflows/gpt-ci.yml-old @@ -0,0 +1,162 @@ +# name: "gpu-ci" +# on: +# pull_request: +# paths: +# - "cmake/**" +# - "config/**" +# - "python/**" +# - "setup.py" +# - "include/**" +# - "src/**" +# - ".github/workflows/gpu-ci.yml" +# - "tests/cpp_gpu_tests.sh" +# - "tests/multi_gpu_tests.sh" +# - "tests/python_interface_test.sh" +# push: +# branches: +# - "master" +# paths: +# - "cmake/**" +# - "config/**" +# - "python/**" +# - "setup.py" +# - "include/**" +# - "src/**" +# - ".github/workflows/gpu-ci.yml" +# - "tests/cpp_gpu_tests.sh" +# - "tests/multi_gpu_tests.sh" +# - "tests/python_interface_test.sh" +# workflow_dispatch: + +# concurrency: +# group: gpu-ci-${{ github.head_ref || github.run_id }} +# cancel-in-progress: true + +# jobs: +# gpu-ci-concierge: +# name: GPU CI Concierge +# runs-on: ubuntu-20.04 +# env: +# FLEXFLOW_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# steps: +# - name: Checkout Git Repository +# uses: actions/checkout@v3 + +# - name: Wait for daemon to be done +# run: | +# pip3 install pip --upgrade +# pip3 install pyopenssl --upgrade +# pip3 install pygithub +# python3 .github/workflows/helpers/gpu_ci_helper.py + +# python-interface-check: +# name: Check Python Interface +# runs-on: self-hosted +# defaults: +# run: +# shell: bash -l {0} # required to use an activated conda environment +# env: +# CONDA: "3" +# needs: gpu-ci-concierge +# container: +# image: ghcr.io/flexflow/flexflow-environment-cuda:latest +# options: --gpus all --shm-size=8192m +# steps: +# - name: Install updated git version +# run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git + +# - name: Checkout Git Repository +# uses: actions/checkout@v3 +# with: +# submodules: recursive + +# - name: Install conda and FlexFlow dependencies +# uses: conda-incubator/setup-miniconda@v2 +# with: +# miniconda-version: "latest" +# activate-environment: flexflow +# environment-file: conda/flexflow-cpu.yml +# auto-activate-base: false + +# - name: Install conda and Pytorch dependencies for pytorch alignment test +# run: | +# conda env create -f conda/pytorch-gpu.yml + +# - name: Build FlexFlow +# run: | +# export PATH=$CONDA_PREFIX/bin:$PATH +# export FF_HOME=$(pwd) +# mkdir build +# cd build +# ../config/config.linux +# make -j + +# - name: Check FlexFlow Python interface (before installation) +# run: | +# export PATH=$CONDA_PREFIX/bin:$PATH +# export FF_HOME=$(pwd) +# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib +# ./tests/python_interface_test.sh before-installation + +# - name: Install FlexFlow +# run: | +# export PATH=$CONDA_PREFIX/bin:$PATH +# export FF_HOME=$(pwd) +# cd build +# ../config/config.linux +# make install +# ldconfig + +# - name: Check FlexFlow Python interface (after installation) +# run: | +# export PATH=$CONDA_PREFIX/bin:$PATH +# export FF_HOME=$(pwd) +# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib +# ./tests/python_interface_test.sh after-installation + +# - name: Run flexflow alignment with pytorch +# run: | +# # run alingment tests +# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib +# ./tests/align/test_all_operators.sh + +# gpu-ci-flexflow: +# name: Single Machine, Multiple GPUs Tests +# runs-on: self-hosted +# needs: gpu-ci-concierge +# container: +# image: ghcr.io/flexflow/flexflow-environment-cuda:latest +# options: --gpus all --shm-size=8192m +# steps: +# - name: Install updated git version +# run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git +# - name: Checkout Git Repository +# uses: actions/checkout@v3 +# with: +# submodules: recursive + +# - name: Build and Install FlexFlow +# run: | +# export PATH=/opt/conda/bin:$PATH +# export FF_HOME=$(pwd) +# export FF_BUILD_ALL_EXAMPLES=ON +# pip install . --verbose + +# - name: Check FlexFlow Python interface (pip) +# run: | +# export PATH=/opt/conda/bin:$PATH +# export FF_HOME=$(pwd) +# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib +# ./tests/python_interface_test.sh after-installation + +# - name: Run multi-gpu tests +# run: | +# export PATH=/opt/conda/bin:$PATH +# export CUDNN_DIR=/usr/local/cuda +# export CUDA_DIR=/usr/local/cuda +# export FF_HOME=$(pwd) +# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib +# # C++ tests +# ./tests/cpp_gpu_tests.sh 4 +# # Python tests +# ./tests/multi_gpu_tests.sh 4 \ No newline at end of file diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml deleted file mode 100644 index d524ee4115..0000000000 --- a/.github/workflows/gpu-ci.yml +++ /dev/null @@ -1,163 +0,0 @@ -name: "gpu-ci" -on: - pull_request: - paths: - - "cmake/**" - - "config/**" - - "python/**" - - "setup.py" - - "include/**" - - "src/**" - - ".github/workflows/gpu-ci.yml" - - "tests/cpp_gpu_tests.sh" - - "tests/multi_gpu_tests.sh" - - "tests/python_interface_test.sh" - push: - branches: - - "master" - paths: - - "cmake/**" - - "config/**" - - "python/**" - - "setup.py" - - "include/**" - - "src/**" - - ".github/workflows/gpu-ci.yml" - - "tests/cpp_gpu_tests.sh" - - "tests/multi_gpu_tests.sh" - - "tests/python_interface_test.sh" - workflow_dispatch: - -concurrency: - group: gpu-ci-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - gpu-ci-concierge: - name: GPU CI Concierge - runs-on: ubuntu-20.04 - env: - FLEXFLOW_TOKEN: ${{ secrets.GITHUB_TOKEN }} - steps: - - name: Checkout Git Repository - uses: actions/checkout@v3 - - - name: Wait for daemon to be done - run: | - pip3 install pip --upgrade - pip3 install pyopenssl --upgrade - pip3 install pygithub - python3 .github/workflows/helpers/gpu_ci_helper.py - - python-interface-check: - name: Check Python Interface - runs-on: self-hosted - defaults: - run: - shell: bash -l {0} # required to use an activated conda environment - env: - CONDA: "3" - needs: gpu-ci-concierge - container: - image: ghcr.io/flexflow/flexflow-environment-cuda:latest - options: --gpus all --shm-size=8192m - steps: - - name: Install updated git version - run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git - - - name: Checkout Git Repository - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Install conda and FlexFlow dependencies - uses: conda-incubator/setup-miniconda@v2 - with: - miniconda-version: "latest" - activate-environment: flexflow - environment-file: conda/flexflow-cpu.yml - auto-activate-base: false - - - name: Install conda and Pytorch dependencies for pytorch alignment test - run: | - conda env create -f conda/pytorch-gpu.yml - - - name: Build FlexFlow - run: | - export PATH=$CONDA_PREFIX/bin:$PATH - export FF_HOME=$(pwd) - mkdir build - cd build - ../config/config.linux - make -j - - - name: Check FlexFlow Python interface (before installation) - run: | - export PATH=$CONDA_PREFIX/bin:$PATH - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib - ./tests/python_interface_test.sh before-installation - - - name: Install FlexFlow - run: | - export PATH=$CONDA_PREFIX/bin:$PATH - export FF_HOME=$(pwd) - cd build - ../config/config.linux - make install - ldconfig - - - name: Check FlexFlow Python interface (after installation) - run: | - export PATH=$CONDA_PREFIX/bin:$PATH - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib - ./tests/python_interface_test.sh after-installation - - - name: Run flexflow alignment with pytorch - run: | - # run alingment tests - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib - ./tests/align/test_all_operators.sh - - gpu-ci-flexflow: - name: Single Machine, Multiple GPUs Tests - runs-on: self-hosted - needs: gpu-ci-concierge - container: - image: ghcr.io/flexflow/flexflow-environment-cuda:latest - options: --gpus all --shm-size=8192m - steps: - - name: Install updated git version - run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git - - name: Checkout Git Repository - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Build and Install FlexFlow - run: | - export PATH=/opt/conda/bin:$PATH - export FF_HOME=$(pwd) - export FF_BUILD_ALL_EXAMPLES=ON - pip install . --verbose - - - name: Check FlexFlow Python interface (pip) - run: | - export PATH=/opt/conda/bin:$PATH - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib - ./tests/python_interface_test.sh after-installation - - - name: Run multi-gpu tests - run: | - export PATH=/opt/conda/bin:$PATH - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib - # C++ tests - ./tests/cpp_gpu_tests.sh 4 - # Python tests - ./tests/multi_gpu_tests.sh 4 - diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 28c083189c..a85471b377 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -13,7 +13,7 @@ jobs: shell: bash -l {0} # required to use an activated conda environment strategy: matrix: - gpu_backend: ["cuda","hip_rocm"] + gpu_backend: ["cuda"] library: ["runtime", "ffi","compiler","kernels","op-attrs","pcg","substitutions","utils"] fail-fast: false steps: From 88e3d61c0bab4de1ef3e75c18e009d720df0d6d7 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Fri, 1 Sep 2023 02:54:08 +0000 Subject: [PATCH 02/31] update the shell-check --- .github/workflows/shell-check.yml | 1 + run.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/shell-check.yml b/.github/workflows/shell-check.yml index a825d63d9c..090f9279b0 100644 --- a/.github/workflows/shell-check.yml +++ b/.github/workflows/shell-check.yml @@ -9,4 +9,5 @@ jobs: - name: Run ShellCheck uses: ludeeus/action-shellcheck@master with: + shell: bash ignore_paths: ./triton/** diff --git a/run.sh b/run.sh index 108a859a7f..89407ddf17 100644 --- a/run.sh +++ b/run.sh @@ -1,3 +1,3 @@ - +#!/bin/bash cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DFF_CUDA_ARCH=75 -DCMAKE_CUDA_ARCHITECTURES=75 -DCMAKE_CXX_COMPILER="clang++" -DCMAKE_C_COMPILER="clang" -DCMAKE_CUDA_COMPILER="clang++" -DCMAKE_CUDA_HOST_COMPILER="clang++" .. \ No newline at end of file From 6dde4e8d44471d825a71511fa77e0fc78f9163f4 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Fri, 1 Sep 2023 03:00:38 +0000 Subject: [PATCH 03/31] update the docker_build.yml --- .github/workflows/docker-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index e8bdc69d59..e601a56755 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -47,7 +47,7 @@ jobs: else export FF_CUDA_ARCH=70 fi - ./docker/build.sh flexflow + ../../packaging/docker/build.sh flexflow - name: Check availability of Python flexflow.core module if: ${{ matrix.gpu_backend == 'cuda' }} From 9f935a195353178acec1a67d1ac73b0c6036f0e6 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Fri, 1 Sep 2023 03:03:28 +0000 Subject: [PATCH 04/31] update the docker_build.yml --- .github/workflows/docker-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index e601a56755..a1ef864ef5 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -47,7 +47,7 @@ jobs: else export FF_CUDA_ARCH=70 fi - ../../packaging/docker/build.sh flexflow + packaging/docker/build.sh flexflow - name: Check availability of Python flexflow.core module if: ${{ matrix.gpu_backend == 'cuda' }} From a4e7093cf8c8b24a9e73001262faa2a6ad2139bc Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Tue, 26 Sep 2023 13:51:29 +0000 Subject: [PATCH 05/31] remove the unuseful file and comment --- .github/workflows/gpt-ci.yml-old | 162 ------------------------------ .github/workflows/shell-check.yml | 1 - deps/fmt | 2 +- run.sh | 3 - 4 files changed, 1 insertion(+), 167 deletions(-) delete mode 100644 run.sh diff --git a/.github/workflows/gpt-ci.yml-old b/.github/workflows/gpt-ci.yml-old index 6132f90cba..e69de29bb2 100644 --- a/.github/workflows/gpt-ci.yml-old +++ b/.github/workflows/gpt-ci.yml-old @@ -1,162 +0,0 @@ -# name: "gpu-ci" -# on: -# pull_request: -# paths: -# - "cmake/**" -# - "config/**" -# - "python/**" -# - "setup.py" -# - "include/**" -# - "src/**" -# - ".github/workflows/gpu-ci.yml" -# - "tests/cpp_gpu_tests.sh" -# - "tests/multi_gpu_tests.sh" -# - "tests/python_interface_test.sh" -# push: -# branches: -# - "master" -# paths: -# - "cmake/**" -# - "config/**" -# - "python/**" -# - "setup.py" -# - "include/**" -# - "src/**" -# - ".github/workflows/gpu-ci.yml" -# - "tests/cpp_gpu_tests.sh" -# - "tests/multi_gpu_tests.sh" -# - "tests/python_interface_test.sh" -# workflow_dispatch: - -# concurrency: -# group: gpu-ci-${{ github.head_ref || github.run_id }} -# cancel-in-progress: true - -# jobs: -# gpu-ci-concierge: -# name: GPU CI Concierge -# runs-on: ubuntu-20.04 -# env: -# FLEXFLOW_TOKEN: ${{ secrets.GITHUB_TOKEN }} -# steps: -# - name: Checkout Git Repository -# uses: actions/checkout@v3 - -# - name: Wait for daemon to be done -# run: | -# pip3 install pip --upgrade -# pip3 install pyopenssl --upgrade -# pip3 install pygithub -# python3 .github/workflows/helpers/gpu_ci_helper.py - -# python-interface-check: -# name: Check Python Interface -# runs-on: self-hosted -# defaults: -# run: -# shell: bash -l {0} # required to use an activated conda environment -# env: -# CONDA: "3" -# needs: gpu-ci-concierge -# container: -# image: ghcr.io/flexflow/flexflow-environment-cuda:latest -# options: --gpus all --shm-size=8192m -# steps: -# - name: Install updated git version -# run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git - -# - name: Checkout Git Repository -# uses: actions/checkout@v3 -# with: -# submodules: recursive - -# - name: Install conda and FlexFlow dependencies -# uses: conda-incubator/setup-miniconda@v2 -# with: -# miniconda-version: "latest" -# activate-environment: flexflow -# environment-file: conda/flexflow-cpu.yml -# auto-activate-base: false - -# - name: Install conda and Pytorch dependencies for pytorch alignment test -# run: | -# conda env create -f conda/pytorch-gpu.yml - -# - name: Build FlexFlow -# run: | -# export PATH=$CONDA_PREFIX/bin:$PATH -# export FF_HOME=$(pwd) -# mkdir build -# cd build -# ../config/config.linux -# make -j - -# - name: Check FlexFlow Python interface (before installation) -# run: | -# export PATH=$CONDA_PREFIX/bin:$PATH -# export FF_HOME=$(pwd) -# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib -# ./tests/python_interface_test.sh before-installation - -# - name: Install FlexFlow -# run: | -# export PATH=$CONDA_PREFIX/bin:$PATH -# export FF_HOME=$(pwd) -# cd build -# ../config/config.linux -# make install -# ldconfig - -# - name: Check FlexFlow Python interface (after installation) -# run: | -# export PATH=$CONDA_PREFIX/bin:$PATH -# export FF_HOME=$(pwd) -# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib -# ./tests/python_interface_test.sh after-installation - -# - name: Run flexflow alignment with pytorch -# run: | -# # run alingment tests -# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib -# ./tests/align/test_all_operators.sh - -# gpu-ci-flexflow: -# name: Single Machine, Multiple GPUs Tests -# runs-on: self-hosted -# needs: gpu-ci-concierge -# container: -# image: ghcr.io/flexflow/flexflow-environment-cuda:latest -# options: --gpus all --shm-size=8192m -# steps: -# - name: Install updated git version -# run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git -# - name: Checkout Git Repository -# uses: actions/checkout@v3 -# with: -# submodules: recursive - -# - name: Build and Install FlexFlow -# run: | -# export PATH=/opt/conda/bin:$PATH -# export FF_HOME=$(pwd) -# export FF_BUILD_ALL_EXAMPLES=ON -# pip install . --verbose - -# - name: Check FlexFlow Python interface (pip) -# run: | -# export PATH=/opt/conda/bin:$PATH -# export FF_HOME=$(pwd) -# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib -# ./tests/python_interface_test.sh after-installation - -# - name: Run multi-gpu tests -# run: | -# export PATH=/opt/conda/bin:$PATH -# export CUDNN_DIR=/usr/local/cuda -# export CUDA_DIR=/usr/local/cuda -# export FF_HOME=$(pwd) -# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib -# # C++ tests -# ./tests/cpp_gpu_tests.sh 4 -# # Python tests -# ./tests/multi_gpu_tests.sh 4 \ No newline at end of file diff --git a/.github/workflows/shell-check.yml b/.github/workflows/shell-check.yml index 090f9279b0..a825d63d9c 100644 --- a/.github/workflows/shell-check.yml +++ b/.github/workflows/shell-check.yml @@ -9,5 +9,4 @@ jobs: - name: Run ShellCheck uses: ludeeus/action-shellcheck@master with: - shell: bash ignore_paths: ./triton/** diff --git a/deps/fmt b/deps/fmt index f5e54359df..a33701196a 160000 --- a/deps/fmt +++ b/deps/fmt @@ -1 +1 @@ -Subproject commit f5e54359df4c26b6230fc61d38aa294581393084 +Subproject commit a33701196adfad74917046096bf5a2aa0ab0bb50 diff --git a/run.sh b/run.sh deleted file mode 100644 index 89407ddf17..0000000000 --- a/run.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DFF_CUDA_ARCH=75 -DCMAKE_CUDA_ARCHITECTURES=75 -DCMAKE_CXX_COMPILER="clang++" -DCMAKE_C_COMPILER="clang" -DCMAKE_CUDA_COMPILER="clang++" -DCMAKE_CUDA_HOST_COMPILER="clang++" .. \ No newline at end of file From d818954fdb75b87759f8282c92fc00fb0f329520 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:00:21 +0000 Subject: [PATCH 06/31] remove the makefile in ci --- .github/workflows/build.yml | 44 ------------------------------------- 1 file changed, 44 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1a9c486f55..ceb17d45f9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -114,47 +114,3 @@ jobs: cd build ./tests/unit/unit-test - makefile-build: - name: Build FlexFlow with the Makefile - runs-on: ubuntu-20.04 - defaults: - run: - shell: bash -l {0} # required to use an activated conda environment - steps: - - name: Checkout Git Repository - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Free additional space on runner - run: .github/workflows/helpers/free_space_on_runner.sh - - - name: Install CUDA - uses: Jimver/cuda-toolkit@v0.2.8 - id: cuda-toolkit - with: - cuda: "11.1.1" - use-github-cache: "false" - - - name: Install system dependencies - run: .github/workflows/helpers/install_dependencies.sh - - - name: Install conda and FlexFlow dependencies - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: flexflow - environment-file: conda/environment.yml - auto-activate-base: false - - - name: Build FlexFlow - run: | - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - cores_available=$(nproc --all) - n_build_cores=$(( cores_available -1 )) - if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi - - cd python - make -j $n_build_cores - python -c 'import flexflow.core' From f2daf25bdfe31d44d2a2f5fc8060ac497324c9bb Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:03:00 +0000 Subject: [PATCH 07/31] format the code --- .github/workflows/build.yml | 12 +-- .github/workflows/pip-install.yml | 12 +-- .../docker/flexflow-environment/Dockerfile | 100 +++++++++--------- 3 files changed, 62 insertions(+), 62 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ceb17d45f9..8c57d4738b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -56,12 +56,12 @@ jobs: - name: Install system dependencies run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh - - name: Install conda and FlexFlow dependencies - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: flexflow - environment-file: conda/environment.yml - auto-activate-base: false + # - name: Install conda and FlexFlow dependencies + # uses: conda-incubator/setup-miniconda@v2 + # with: + # activate-environment: flexflow + # environment-file: conda/environment.yml + # auto-activate-base: false - name: Build FlexFlow run: | diff --git a/.github/workflows/pip-install.yml b/.github/workflows/pip-install.yml index 48e0798008..2f0263b097 100644 --- a/.github/workflows/pip-install.yml +++ b/.github/workflows/pip-install.yml @@ -50,12 +50,12 @@ jobs: - name: Install system dependencies run: .github/workflows/helpers/install_dependencies.sh - - name: Install conda and FlexFlow dependencies - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: flexflow - environment-file: conda/environment.yml - auto-activate-base: false + # - name: Install conda and FlexFlow dependencies + # uses: conda-incubator/setup-miniconda@v2 + # with: + # activate-environment: flexflow + # environment-file: conda/environment.yml + # auto-activate-base: false - name: Build and Install FlexFlow run: | diff --git a/packaging/docker/flexflow-environment/Dockerfile b/packaging/docker/flexflow-environment/Dockerfile index 061b63352b..e3c1cf8fc4 100644 --- a/packaging/docker/flexflow-environment/Dockerfile +++ b/packaging/docker/flexflow-environment/Dockerfile @@ -1,57 +1,57 @@ -FROM nvidia/cuda:11.7.0-cudnn8-devel-ubuntu20.04 +# FROM nvidia/cuda:11.7.0-cudnn8-devel-ubuntu20.04 -LABEL org.opencontainers.image.source=https://github.com/flexflow/FlexFlow -LABEL org.opencontainers.image.description="FlexFlow environment container" +# LABEL org.opencontainers.image.source=https://github.com/flexflow/FlexFlow +# LABEL org.opencontainers.image.description="FlexFlow environment container" -# Install basic dependencies -RUN apt-get update && apt-get install -y --no-install-recommends wget sudo binutils git zlib1g-dev lsb-release nano libhdf5-dev && \ - rm -rf /var/lib/apt/lists/* /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/nvidia-ml.list && \ - apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends software-properties-common && \ - apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends build-essential apt-utils \ - ca-certificates libssl-dev curl unzip htop && DEBIAN_FRONTEND=noninteractive \ - apt-get install -y software-properties-common && \ - add-apt-repository ppa:ubuntu-toolchain-r/test && \ - apt-get update -y && \ - apt-get upgrade -y libstdc++6 +# # Install basic dependencies +# RUN apt-get update && apt-get install -y --no-install-recommends wget sudo binutils git zlib1g-dev lsb-release nano libhdf5-dev && \ +# rm -rf /var/lib/apt/lists/* /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/nvidia-ml.list && \ +# apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends software-properties-common && \ +# apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends build-essential apt-utils \ +# ca-certificates libssl-dev curl unzip htop && DEBIAN_FRONTEND=noninteractive \ +# apt-get install -y software-properties-common && \ +# add-apt-repository ppa:ubuntu-toolchain-r/test && \ +# apt-get update -y && \ +# apt-get upgrade -y libstdc++6 -# Install Python3 with Miniconda -RUN wget -c -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - mv Miniconda3-latest-Linux-x86_64.sh ~/Miniconda3-latest-Linux-x86_64.sh && \ - chmod +x ~/Miniconda3-latest-Linux-x86_64.sh && \ - bash ~/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \ - rm ~/Miniconda3-latest-Linux-x86_64.sh && \ - /opt/conda/bin/conda upgrade --all && \ - /opt/conda/bin/conda install conda-build conda-verify && \ - /opt/conda/bin/conda clean -ya +# # Install Python3 with Miniconda +# RUN wget -c -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ +# mv Miniconda3-latest-Linux-x86_64.sh ~/Miniconda3-latest-Linux-x86_64.sh && \ +# chmod +x ~/Miniconda3-latest-Linux-x86_64.sh && \ +# bash ~/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \ +# rm ~/Miniconda3-latest-Linux-x86_64.sh && \ +# /opt/conda/bin/conda upgrade --all && \ +# /opt/conda/bin/conda install conda-build conda-verify && \ +# /opt/conda/bin/conda clean -ya -# Optionally install HIP dependencies -# Note that amd's docs say to also install the `hip-runtime-nvidia` package. This -# package attempts to re-install cuda even though cuda is already installed -# in the container. It also attempts to install packages for a graphical install. -# For our container, we don't need `hip-runtime-nvidia` -ARG FF_GPU_BACKEND "cuda" -RUN if [ "$FF_GPU_BACKEND" = "hip_cuda" ] || [ "$FF_GPU_BACKEND" = "hip_rocm" ]; then \ - echo "FF_GPU_BACKEND: ${FF_GPU_BACKEND}. Installing HIP dependencies"; \ - wget https://repo.radeon.com/amdgpu-install/22.20.5/ubuntu/bionic/amdgpu-install_22.20.50205-1_all.deb; \ - apt-get install -y ./amdgpu-install_22.20.50205-1_all.deb; \ - rm ./amdgpu-install_22.20.50205-1_all.deb; \ - amdgpu-install -y --usecase=hip,rocm --no-dkms; \ - apt-get install -y hip-dev hipblas miopen-hip rocm-hip-sdk; \ - else \ - echo "FF_GPU_BACKEND: ${FF_GPU_BACKEND}. Skipping installing HIP dependencies"; \ - fi -RUN rm -rf /var/lib/apt/lists/* +# # Optionally install HIP dependencies +# # Note that amd's docs say to also install the `hip-runtime-nvidia` package. This +# # package attempts to re-install cuda even though cuda is already installed +# # in the container. It also attempts to install packages for a graphical install. +# # For our container, we don't need `hip-runtime-nvidia` +# ARG FF_GPU_BACKEND "cuda" +# RUN if [ "$FF_GPU_BACKEND" = "hip_cuda" ] || [ "$FF_GPU_BACKEND" = "hip_rocm" ]; then \ +# echo "FF_GPU_BACKEND: ${FF_GPU_BACKEND}. Installing HIP dependencies"; \ +# wget https://repo.radeon.com/amdgpu-install/22.20.5/ubuntu/bionic/amdgpu-install_22.20.50205-1_all.deb; \ +# apt-get install -y ./amdgpu-install_22.20.50205-1_all.deb; \ +# rm ./amdgpu-install_22.20.50205-1_all.deb; \ +# amdgpu-install -y --usecase=hip,rocm --no-dkms; \ +# apt-get install -y hip-dev hipblas miopen-hip rocm-hip-sdk; \ +# else \ +# echo "FF_GPU_BACKEND: ${FF_GPU_BACKEND}. Skipping installing HIP dependencies"; \ +# fi +# RUN rm -rf /var/lib/apt/lists/* -# Set env vars -ENV PATH /opt/conda/bin:$PATH -ENV CUDNN_DIR /usr/local/cuda -ENV CUDA_DIR /usr/local/cuda +# # Set env vars +# ENV PATH /opt/conda/bin:$PATH +# ENV CUDNN_DIR /usr/local/cuda +# ENV CUDA_DIR /usr/local/cuda -# Install python packages and other dependencies -RUN conda install -c conda-forge cmake make pillow cmake-build-extension pybind11 numpy pandas keras-preprocessing -# Install CPU-only Pytorch and related dependencies -RUN conda install pytorch torchvision torchaudio cpuonly -c pytorch -RUN conda install -c conda-forge onnx transformers sentencepiece -RUN pip3 install tensorflow +# # Install python packages and other dependencies +# RUN conda install -c conda-forge cmake make pillow cmake-build-extension pybind11 numpy pandas keras-preprocessing +# # Install CPU-only Pytorch and related dependencies +# RUN conda install pytorch torchvision torchaudio cpuonly -c pytorch +# RUN conda install -c conda-forge onnx transformers sentencepiece +# RUN pip3 install tensorflow -ENTRYPOINT ["/bin/bash"] +# ENTRYPOINT ["/bin/bash"] From 4a6b2c9328180d905abbf53d0de166f964d662cc Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:11:05 +0000 Subject: [PATCH 08/31] rename the dockerfile --- .../docker/flexflow-environment/{Dockerfile => Dockerfile-old} | 0 packaging/docker/flexflow/{Dockerfile => Dockerfile-old} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename packaging/docker/flexflow-environment/{Dockerfile => Dockerfile-old} (100%) rename packaging/docker/flexflow/{Dockerfile => Dockerfile-old} (100%) diff --git a/packaging/docker/flexflow-environment/Dockerfile b/packaging/docker/flexflow-environment/Dockerfile-old similarity index 100% rename from packaging/docker/flexflow-environment/Dockerfile rename to packaging/docker/flexflow-environment/Dockerfile-old diff --git a/packaging/docker/flexflow/Dockerfile b/packaging/docker/flexflow/Dockerfile-old similarity index 100% rename from packaging/docker/flexflow/Dockerfile rename to packaging/docker/flexflow/Dockerfile-old From fb46abc1b2fd80a4518dcd80c2c5eedac809e539 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:17:02 +0000 Subject: [PATCH 09/31] fix the comment --- .../include/substitutions/sub_parallel_computation_graph.h | 2 +- .../include/utils/graph/labelled/labelled_open_interfaces.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/substitutions/include/substitutions/sub_parallel_computation_graph.h b/lib/substitutions/include/substitutions/sub_parallel_computation_graph.h index 96a3b41dfc..24ca543313 100644 --- a/lib/substitutions/include/substitutions/sub_parallel_computation_graph.h +++ b/lib/substitutions/include/substitutions/sub_parallel_computation_graph.h @@ -14,7 +14,7 @@ using SubParallelComputationGraph = CHECK_WELL_BEHAVED_VALUE_TYPE_NO_EQ(SubParallelComputationGraph); -ParallelTensor at(SubParallelComputationGraph const &g, OpenMultiDiEdge const &e); +ParallelTensor at(SubParallelComputationGraph const &, OpenMultiDiEdge const &); } // namespace FlexFlow diff --git a/lib/utils/include/utils/graph/labelled/labelled_open_interfaces.h b/lib/utils/include/utils/graph/labelled/labelled_open_interfaces.h index 20138c4212..09289077fb 100644 --- a/lib/utils/include/utils/graph/labelled/labelled_open_interfaces.h +++ b/lib/utils/include/utils/graph/labelled/labelled_open_interfaces.h @@ -14,8 +14,8 @@ template { -public: +public: std::unordered_set query_edges(MultiDiEdgeQuery const &q) const final { return map_over_unordered_set([](OpenMultiDiEdge const &e) { return get(e); }, From 18cd2413466b0043cca9f12d4b3f93c60660df55 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:19:48 +0000 Subject: [PATCH 10/31] format the code --- lib/substitutions/src/sub_parallel_computation_graph.cc | 7 ++++--- .../utils/graph/labelled/labelled_open_interfaces.h | 8 +++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/substitutions/src/sub_parallel_computation_graph.cc b/lib/substitutions/src/sub_parallel_computation_graph.cc index ac67451c78..e8ab70648f 100644 --- a/lib/substitutions/src/sub_parallel_computation_graph.cc +++ b/lib/substitutions/src/sub_parallel_computation_graph.cc @@ -2,8 +2,9 @@ namespace FlexFlow { -ParallelTensor at(SubParallelComputationGraph const &g, OpenMultiDiEdge const &e) { - return visit([&](const auto &e) { return g.at(e); }, e); +ParallelTensor at(SubParallelComputationGraph const &g, + OpenMultiDiEdge const &e) { + return visit([&](auto const &e) { return g.at(e); }, e); } -} +} // namespace FlexFlow diff --git a/lib/utils/include/utils/graph/labelled/labelled_open_interfaces.h b/lib/utils/include/utils/graph/labelled/labelled_open_interfaces.h index 09289077fb..5cf134a653 100644 --- a/lib/utils/include/utils/graph/labelled/labelled_open_interfaces.h +++ b/lib/utils/include/utils/graph/labelled/labelled_open_interfaces.h @@ -2,8 +2,8 @@ #define _FLEXFLOW_UTILS_INCLUDE_UTILS_GRAPH_LABELLED_LABELLED_OPEN_INTERFACES_H #include "standard_labelled_interfaces.h" -#include "utils/graph/open_graph_interfaces.h" #include "utils/containers.h" +#include "utils/graph/open_graph_interfaces.h" namespace FlexFlow { @@ -18,8 +18,10 @@ struct ILabelledOpenMultiDiGraphView public: std::unordered_set query_edges(MultiDiEdgeQuery const &q) const final { - return map_over_unordered_set([](OpenMultiDiEdge const &e) { return get(e); }, - IOpenMultiDiGraphView::query_edges(static_cast(q))); + return map_over_unordered_set( + [](OpenMultiDiEdge const &e) { return get(e); }, + IOpenMultiDiGraphView::query_edges( + static_cast(q))); } using ILabelledMultiDiGraphView::at; From 01b8ef277666fda465a0a6ff846a9003edbeb1fb Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:23:58 +0000 Subject: [PATCH 11/31] comment the docker --- packaging/docker/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/docker/build.sh b/packaging/docker/build.sh index 7e8587bfdf..767a62a6e4 100755 --- a/packaging/docker/build.sh +++ b/packaging/docker/build.sh @@ -27,7 +27,7 @@ else fi # Build the FlexFlow Enviroment docker image -docker build --build-arg "FF_GPU_BACKEND=${FF_GPU_BACKEND}" -t "flexflow-environment-${FF_GPU_BACKEND}" -f docker/flexflow-environment/Dockerfile . +#docker build --build-arg "FF_GPU_BACKEND=${FF_GPU_BACKEND}" -t "flexflow-environment-${FF_GPU_BACKEND}" -f docker/flexflow-environment/Dockerfile . # If the user only wants to build the environment image, we are done if [[ "$image" == "flexflow-environment" ]]; then From 13cf2fd4207112001803f95bbf3b0116af9ab028 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:28:10 +0000 Subject: [PATCH 12/31] leave config --- .github/workflows/build.yml | 212 ++++++++++++++-------------- .github/workflows/per-lib-check.yml | 36 ++--- packaging/docker/build.sh | 4 +- 3 files changed, 126 insertions(+), 126 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8c57d4738b..aabfec54fa 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,116 +1,116 @@ -name: "build" -on: - pull_request: - paths: - - "include/**" - - "cmake/**" - - "config/**" - - "python/**" - - "src/**" - - ".github/workflows/helpers/install_dependencies.sh" - - ".github/workflows/build.yml" - push: - branches: - - "master" - paths: - - "include/**" - - "cmake/**" - - "config/**" - - "python/**" - - "src/**" - - ".github/workflows/helpers/install_dependencies.sh" - - ".github/workflows/build.yml" - workflow_dispatch: -concurrency: - group: build-${{ github.head_ref || github.run_id }} - cancel-in-progress: true +# name: "build" +# on: +# pull_request: +# paths: +# - "include/**" +# - "cmake/**" +# - "config/**" +# - "python/**" +# - "src/**" +# - ".github/workflows/helpers/install_dependencies.sh" +# - ".github/workflows/build.yml" +# push: +# branches: +# - "master" +# paths: +# - "include/**" +# - "cmake/**" +# - "config/**" +# - "python/**" +# - "src/**" +# - ".github/workflows/helpers/install_dependencies.sh" +# - ".github/workflows/build.yml" +# workflow_dispatch: +# concurrency: +# group: build-${{ github.head_ref || github.run_id }} +# cancel-in-progress: true -jobs: - cmake-build: - name: Build FlexFlow with CMake - runs-on: ubuntu-20.04 - defaults: - run: - shell: bash -l {0} # required to use an activated conda environment - strategy: - matrix: - gpu_backend: ["cuda"] - fail-fast: false - steps: - - name: Checkout Git Repository - uses: actions/checkout@v3 - with: - submodules: recursive +# jobs: +# cmake-build: +# name: Build FlexFlow with CMake +# runs-on: ubuntu-20.04 +# defaults: +# run: +# shell: bash -l {0} # required to use an activated conda environment +# strategy: +# matrix: +# gpu_backend: ["cuda"] +# fail-fast: false +# steps: +# - name: Checkout Git Repository +# uses: actions/checkout@v3 +# with: +# submodules: recursive - - name: Free additional space on runner - run: .github/workflows/helpers/free_space_on_runner.sh +# - name: Free additional space on runner +# run: .github/workflows/helpers/free_space_on_runner.sh - - name: Install CUDA - uses: Jimver/cuda-toolkit@v0.2.8 - id: cuda-toolkit - with: - cuda: "11.1.1" - # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement - use-github-cache: "false" +# - name: Install CUDA +# uses: Jimver/cuda-toolkit@v0.2.8 +# id: cuda-toolkit +# with: +# cuda: "11.1.1" +# # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement +# use-github-cache: "false" - - name: Install system dependencies - run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh +# - name: Install system dependencies +# run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh - # - name: Install conda and FlexFlow dependencies - # uses: conda-incubator/setup-miniconda@v2 - # with: - # activate-environment: flexflow - # environment-file: conda/environment.yml - # auto-activate-base: false +# # - name: Install conda and FlexFlow dependencies +# # uses: conda-incubator/setup-miniconda@v2 +# # with: +# # activate-environment: flexflow +# # environment-file: conda/environment.yml +# # auto-activate-base: false - - name: Build FlexFlow - run: | - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - export FF_GPU_BACKEND=${{ matrix.gpu_backend }} - export FF_CUDA_ARCH=70 - cores_available=$(nproc --all) - n_build_cores=$(( cores_available -1 )) - if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi - mkdir build - cd build - if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - export FF_BUILD_ALL_EXAMPLES=ON - export FF_BUILD_UNIT_TESTS=ON - fi - ../config/config.linux - make -j $n_build_cores +# - name: Build FlexFlow +# run: | +# export CUDNN_DIR=/usr/local/cuda +# export CUDA_DIR=/usr/local/cuda +# export FF_HOME=$(pwd) +# export FF_GPU_BACKEND=${{ matrix.gpu_backend }} +# export FF_CUDA_ARCH=70 +# cores_available=$(nproc --all) +# n_build_cores=$(( cores_available -1 )) +# if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi +# mkdir build +# cd build +# if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then +# export FF_BUILD_ALL_EXAMPLES=ON +# export FF_BUILD_UNIT_TESTS=ON +# fi +# ../config/config.linux +# make -j $n_build_cores - - name: Install FlexFlow - run: | - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - export FF_GPU_BACKEND=${{ matrix.gpu_backend }} - export FF_CUDA_ARCH=70 - cd build - if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - export FF_BUILD_ALL_EXAMPLES=ON - export FF_BUILD_UNIT_TESTS=ON - fi - ../config/config.linux - sudo make install - sudo ldconfig +# - name: Install FlexFlow +# run: | +# export CUDNN_DIR=/usr/local/cuda +# export CUDA_DIR=/usr/local/cuda +# export FF_HOME=$(pwd) +# export FF_GPU_BACKEND=${{ matrix.gpu_backend }} +# export FF_CUDA_ARCH=70 +# cd build +# if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then +# export FF_BUILD_ALL_EXAMPLES=ON +# export FF_BUILD_UNIT_TESTS=ON +# fi +# ../config/config.linux +# sudo make install +# sudo ldconfig - - name: Check availability of Python flexflow.core module - if: ${{ matrix.gpu_backend == 'cuda' }} - run: | - python -c "import flexflow.core; exit()" +# - name: Check availability of Python flexflow.core module +# if: ${{ matrix.gpu_backend == 'cuda' }} +# run: | +# python -c "import flexflow.core; exit()" - - name: Run C++ unit tests - if: ${{ matrix.gpu_backend == 'cuda' }} - run: | - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$CUDA_DIR/lib64/stubs:$LD_LIBRARY_PATH - sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 - cd build - ./tests/unit/unit-test +# - name: Run C++ unit tests +# if: ${{ matrix.gpu_backend == 'cuda' }} +# run: | +# export CUDNN_DIR=/usr/local/cuda +# export CUDA_DIR=/usr/local/cuda +# export FF_HOME=$(pwd) +# export LD_LIBRARY_PATH=$CUDA_DIR/lib64/stubs:$LD_LIBRARY_PATH +# sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 +# cd build +# ./tests/unit/unit-test diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 7ea466e94e..de7b5fd845 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -44,24 +44,24 @@ jobs: environment-file: packaging/conda/environment.yml auto-activate-base: false - - name: Build lib ${{ matrix.library }} - run: | - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - export FF_GPU_BACKEND=${{ matrix.gpu_backend }} - export FF_CUDA_ARCH=70 - cores_available=$(nproc --all) - n_build_cores=$(( cores_available -1 )) - if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi - mkdir build - cd build - #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - # export FF_BUILD_ALL_EXAMPLES=ON - # export FF_BUILD_UNIT_TESTS=ON - #fi - ../config/config.linux - make -j $n_build_cores ${{ matrix.library }} + # - name: Build lib ${{ matrix.library }} + # run: | + # export CUDNN_DIR=/usr/local/cuda + # export CUDA_DIR=/usr/local/cuda + # export FF_HOME=$(pwd) + # export FF_GPU_BACKEND=${{ matrix.gpu_backend }} + # export FF_CUDA_ARCH=70 + # cores_available=$(nproc --all) + # n_build_cores=$(( cores_available -1 )) + # if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi + # mkdir build + # cd build + # #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then + # # export FF_BUILD_ALL_EXAMPLES=ON + # # export FF_BUILD_UNIT_TESTS=ON + # #fi + # ../config/config.linux + # make -j $n_build_cores ${{ matrix.library }} diff --git a/packaging/docker/build.sh b/packaging/docker/build.sh index 767a62a6e4..bca3d99e71 100755 --- a/packaging/docker/build.sh +++ b/packaging/docker/build.sh @@ -81,8 +81,8 @@ fi # Build FlexFlow Docker image # shellcheck source=/dev/null -. config/config.linux get-docker-configs +#. config/config.linux get-docker-configs # Set value of BUILD_CONFIGS get_build_configs -docker build --build-arg "N_BUILD_CORES=${n_build_cores}" --build-arg "FF_GPU_BACKEND=${FF_GPU_BACKEND}" --build-arg "BUILD_CONFIGS=${BUILD_CONFIGS}" -t "flexflow-${FF_GPU_BACKEND}" -f docker/flexflow/Dockerfile . +#docker build --build-arg "N_BUILD_CORES=${n_build_cores}" --build-arg "FF_GPU_BACKEND=${FF_GPU_BACKEND}" --build-arg "BUILD_CONFIGS=${BUILD_CONFIGS}" -t "flexflow-${FF_GPU_BACKEND}" -f docker/flexflow/Dockerfile . From a054943e279df1156f141e75f66b4dc327901552 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:30:48 +0000 Subject: [PATCH 13/31] comment --- packaging/docker/build.sh | 154 +++++++++++++++++++------------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/packaging/docker/build.sh b/packaging/docker/build.sh index bca3d99e71..afa5e248f7 100755 --- a/packaging/docker/build.sh +++ b/packaging/docker/build.sh @@ -1,88 +1,88 @@ -#! /usr/bin/env bash -set -euo pipefail +# #! /usr/bin/env bash +# set -euo pipefail -# Usage: ./build.sh +# # Usage: ./build.sh -# https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -# Cd into $FF_HOME. Assumes this script is in $FF_HOME/docker -cd "$SCRIPT_DIR/.." +# # https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script +# SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +# # Cd into $FF_HOME. Assumes this script is in $FF_HOME/docker +# cd "$SCRIPT_DIR/.." -# Get name of desired Docker image as input -image="${1:-flexflow}" -if [[ "$image" != @(flexflow-environment|flexflow) ]]; then - echo "Error, image name ${image} is invalid. Choose between 'flexflow-environment' and 'flexflow'." - exit 1 -fi +# # Get name of desired Docker image as input +# image="${1:-flexflow}" +# if [[ "$image" != @(flexflow-environment|flexflow) ]]; then +# echo "Error, image name ${image} is invalid. Choose between 'flexflow-environment' and 'flexflow'." +# exit 1 +# fi -# Set up GPU backend -FF_GPU_BACKEND=${FF_GPU_BACKEND:-cuda} -if [[ "${FF_GPU_BACKEND}" != @(cuda|hip_cuda|hip_rocm|intel) ]]; then - echo "Error, value of FF_GPU_BACKEND (${FF_GPU_BACKEND}) is invalid. Pick between 'cuda', 'hip_cuda', 'hip_rocm' or 'intel'." - exit 1 -elif [[ "${FF_GPU_BACKEND}" != "cuda" ]]; then - echo "Configuring FlexFlow to build for gpu backend: ${FF_GPU_BACKEND}" -else - echo "Letting FlexFlow build for a default GPU backend: cuda" -fi +# # Set up GPU backend +# FF_GPU_BACKEND=${FF_GPU_BACKEND:-cuda} +# if [[ "${FF_GPU_BACKEND}" != @(cuda|hip_cuda|hip_rocm|intel) ]]; then +# echo "Error, value of FF_GPU_BACKEND (${FF_GPU_BACKEND}) is invalid. Pick between 'cuda', 'hip_cuda', 'hip_rocm' or 'intel'." +# exit 1 +# elif [[ "${FF_GPU_BACKEND}" != "cuda" ]]; then +# echo "Configuring FlexFlow to build for gpu backend: ${FF_GPU_BACKEND}" +# else +# echo "Letting FlexFlow build for a default GPU backend: cuda" +# fi -# Build the FlexFlow Enviroment docker image -#docker build --build-arg "FF_GPU_BACKEND=${FF_GPU_BACKEND}" -t "flexflow-environment-${FF_GPU_BACKEND}" -f docker/flexflow-environment/Dockerfile . +# # Build the FlexFlow Enviroment docker image +# #docker build --build-arg "FF_GPU_BACKEND=${FF_GPU_BACKEND}" -t "flexflow-environment-${FF_GPU_BACKEND}" -f docker/flexflow-environment/Dockerfile . -# If the user only wants to build the environment image, we are done -if [[ "$image" == "flexflow-environment" ]]; then - exit 0 -fi +# # If the user only wants to build the environment image, we are done +# if [[ "$image" == "flexflow-environment" ]]; then +# exit 0 +# fi -# Gather arguments needed to build the FlexFlow image -# Get number of cores available on the machine. Build with all cores but one, to prevent RAM choking -cores_available=$(nproc --all) -n_build_cores=$(( cores_available -1 )) +# # Gather arguments needed to build the FlexFlow image +# # Get number of cores available on the machine. Build with all cores but one, to prevent RAM choking +# cores_available=$(nproc --all) +# n_build_cores=$(( cores_available -1 )) -# If FF_CUDA_ARCH is set to autodetect, we need to perform the autodetection here because the Docker -# image will not have access to GPUs during the build phase (due to a Docker restriction). In all other -# cases, we pass the value of FF_CUDA_ARCH directly to Cmake. -if [[ "${FF_CUDA_ARCH:-autodetect}" == "autodetect" ]]; then - # Get CUDA architecture(s), if GPUs are available - cat << EOF > ./get_gpu_arch.cu -#include -int main() { - int count = 0; - if (cudaSuccess != cudaGetDeviceCount(&count)) return -1; - if (count == 0) return -1; - for (int device = 0; device < count; ++device) { - cudaDeviceProp prop; - if (cudaSuccess == cudaGetDeviceProperties(&prop, device)) - printf("%d ", prop.major*10+prop.minor); - } - return 0; -} -EOF - gpu_arch_codes="" - if command -v nvcc &> /dev/null - then - nvcc ./get_gpu_arch.cu -o ./get_gpu_arch - gpu_arch_codes="$(./get_gpu_arch)" - fi - gpu_arch_codes="$(echo "$gpu_arch_codes" | xargs -n1 | sort -u | xargs)" - gpu_arch_codes="${gpu_arch_codes// /,}" - rm -f ./get_gpu_arch.cu ./get_gpu_arch +# # If FF_CUDA_ARCH is set to autodetect, we need to perform the autodetection here because the Docker +# # image will not have access to GPUs during the build phase (due to a Docker restriction). In all other +# # cases, we pass the value of FF_CUDA_ARCH directly to Cmake. +# if [[ "${FF_CUDA_ARCH:-autodetect}" == "autodetect" ]]; then +# # Get CUDA architecture(s), if GPUs are available +# cat << EOF > ./get_gpu_arch.cu +# #include +# int main() { +# int count = 0; +# if (cudaSuccess != cudaGetDeviceCount(&count)) return -1; +# if (count == 0) return -1; +# for (int device = 0; device < count; ++device) { +# cudaDeviceProp prop; +# if (cudaSuccess == cudaGetDeviceProperties(&prop, device)) +# printf("%d ", prop.major*10+prop.minor); +# } +# return 0; +# } +# EOF +# gpu_arch_codes="" +# if command -v nvcc &> /dev/null +# then +# nvcc ./get_gpu_arch.cu -o ./get_gpu_arch +# gpu_arch_codes="$(./get_gpu_arch)" +# fi +# gpu_arch_codes="$(echo "$gpu_arch_codes" | xargs -n1 | sort -u | xargs)" +# gpu_arch_codes="${gpu_arch_codes// /,}" +# rm -f ./get_gpu_arch.cu ./get_gpu_arch - if [[ -n "$gpu_arch_codes" ]]; then - echo "Host machine has GPUs with architecture codes: $gpu_arch_codes" - echo "Configuring FlexFlow to build for the $gpu_arch_codes code(s)." - FF_CUDA_ARCH="${gpu_arch_codes}" - export FF_CUDA_ARCH - else - echo "FF_CUDA_ARCH is set to 'autodetect', but the host machine does not have any compatible GPUs." - exit 1 - fi -fi +# if [[ -n "$gpu_arch_codes" ]]; then +# echo "Host machine has GPUs with architecture codes: $gpu_arch_codes" +# echo "Configuring FlexFlow to build for the $gpu_arch_codes code(s)." +# FF_CUDA_ARCH="${gpu_arch_codes}" +# export FF_CUDA_ARCH +# else +# echo "FF_CUDA_ARCH is set to 'autodetect', but the host machine does not have any compatible GPUs." +# exit 1 +# fi +# fi -# Build FlexFlow Docker image -# shellcheck source=/dev/null -#. config/config.linux get-docker-configs -# Set value of BUILD_CONFIGS -get_build_configs +# # Build FlexFlow Docker image +# # shellcheck source=/dev/null +# #. config/config.linux get-docker-configs +# # Set value of BUILD_CONFIGS +# get_build_configs -#docker build --build-arg "N_BUILD_CORES=${n_build_cores}" --build-arg "FF_GPU_BACKEND=${FF_GPU_BACKEND}" --build-arg "BUILD_CONFIGS=${BUILD_CONFIGS}" -t "flexflow-${FF_GPU_BACKEND}" -f docker/flexflow/Dockerfile . +# #docker build --build-arg "N_BUILD_CORES=${n_build_cores}" --build-arg "FF_GPU_BACKEND=${FF_GPU_BACKEND}" --build-arg "BUILD_CONFIGS=${BUILD_CONFIGS}" -t "flexflow-${FF_GPU_BACKEND}" -f docker/flexflow/Dockerfile . From e1adfd88bb7e7f0a51f626643ad9d9b80fe67665 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:34:04 +0000 Subject: [PATCH 14/31] add bash --- packaging/docker/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/docker/build.sh b/packaging/docker/build.sh index afa5e248f7..d5ddea3ec3 100755 --- a/packaging/docker/build.sh +++ b/packaging/docker/build.sh @@ -1,4 +1,4 @@ -# #! /usr/bin/env bash +#! /usr/bin/env bash # set -euo pipefail # # Usage: ./build.sh From 73d9fd3a3bb2aa0855b0d29d3982b64d7a3a3a40 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:36:10 +0000 Subject: [PATCH 15/31] remove the docker --- .github/workflows/docker-build.yml | 144 ++++++++++++++--------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index a1ef864ef5..de233d7ecf 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -1,79 +1,79 @@ -name: "docker-build" -on: - pull_request: - paths: - - "docker/**" - - "!docker/README.md" - - ".github/workflows/docker-build.yml" - push: - branches: - - "master" - schedule: - # Run every week on Sunday at midnight PT (3am ET / 8am UTC) to keep the docker images updated - - cron: "0 8 * * 0" - workflow_dispatch: +# name: "docker-build" +# on: +# pull_request: +# paths: +# - "docker/**" +# - "!docker/README.md" +# - ".github/workflows/docker-build.yml" +# push: +# branches: +# - "master" +# schedule: +# # Run every week on Sunday at midnight PT (3am ET / 8am UTC) to keep the docker images updated +# - cron: "0 8 * * 0" +# workflow_dispatch: -# Cancel outdated workflows if they are still running -concurrency: - group: docker-build-${{ github.head_ref || github.run_id }} - cancel-in-progress: true +# # Cancel outdated workflows if they are still running +# concurrency: +# group: docker-build-${{ github.head_ref || github.run_id }} +# cancel-in-progress: true -jobs: - docker-build: - name: Build and Install FlexFlow in a Docker Container - runs-on: ubuntu-20.04 - strategy: - matrix: - gpu_backend: ["cuda"] - fail-fast: false - steps: - - name: Checkout Git Repository - uses: actions/checkout@v3 - with: - submodules: recursive +# jobs: +# docker-build: +# name: Build and Install FlexFlow in a Docker Container +# runs-on: ubuntu-20.04 +# strategy: +# matrix: +# gpu_backend: ["cuda"] +# fail-fast: false +# steps: +# - name: Checkout Git Repository +# uses: actions/checkout@v3 +# with: +# submodules: recursive - - name: Free additional space on runner - run: .github/workflows/helpers/free_space_on_runner.sh +# - name: Free additional space on runner +# run: .github/workflows/helpers/free_space_on_runner.sh - - name: Build Docker container - env: - FF_GPU_BACKEND: ${{ matrix.gpu_backend }} - run: | - # On push to master, build for all compatible architectures, so that we can publish - # a pre-built general-purpose image. On all other cases, only build for one architecture - # to save time. - if [[ ( ${{ github.event_name }} == 'push' || ${{ github.event_name }} == 'schedule' ) && ${GITHUB_REF#refs/heads/} == "master" ]]; then - export FF_CUDA_ARCH=all - else - export FF_CUDA_ARCH=70 - fi - packaging/docker/build.sh flexflow +# - name: Build Docker container +# env: +# FF_GPU_BACKEND: ${{ matrix.gpu_backend }} +# run: | +# # On push to master, build for all compatible architectures, so that we can publish +# # a pre-built general-purpose image. On all other cases, only build for one architecture +# # to save time. +# if [[ ( ${{ github.event_name }} == 'push' || ${{ github.event_name }} == 'schedule' ) && ${GITHUB_REF#refs/heads/} == "master" ]]; then +# export FF_CUDA_ARCH=all +# else +# export FF_CUDA_ARCH=70 +# fi +# packaging/docker/build.sh flexflow - - name: Check availability of Python flexflow.core module - if: ${{ matrix.gpu_backend == 'cuda' }} - run: docker run --entrypoint python flexflow-cuda:latest -c "import flexflow.core; exit()" +# # - name: Check availability of Python flexflow.core module +# # if: ${{ matrix.gpu_backend == 'cuda' }} +# # run: docker run --entrypoint python flexflow-cuda:latest -c "import flexflow.core; exit()" - - name: Publish Docker environment image (on push to master) - if: github.repository_owner == 'flexflow' - env: - FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }} - FF_GPU_BACKEND: ${{ matrix.gpu_backend }} - run: | - if [[ ( ${{ github.event_name }} == 'push' || ${{ github.event_name }} == 'schedule' ) && ${GITHUB_REF#refs/heads/} == "master" ]]; then - ./docker/publish.sh "flexflow-environment-${FF_GPU_BACKEND}" - ./docker/publish.sh "flexflow-${FF_GPU_BACKEND}" - else - echo "No need to update Docker containers in ghrc.io registry at this time." - fi +# - name: Publish Docker environment image (on push to master) +# if: github.repository_owner == 'flexflow' +# env: +# FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }} +# FF_GPU_BACKEND: ${{ matrix.gpu_backend }} +# run: | +# if [[ ( ${{ github.event_name }} == 'push' || ${{ github.event_name }} == 'schedule' ) && ${GITHUB_REF#refs/heads/} == "master" ]]; then +# ./docker/publish.sh "flexflow-environment-${FF_GPU_BACKEND}" +# ./docker/publish.sh "flexflow-${FF_GPU_BACKEND}" +# else +# echo "No need to update Docker containers in ghrc.io registry at this time." +# fi - notify-slack: - name: Notify Slack in case of failure - runs-on: ubuntu-20.04 - needs: docker-build - if: ${{ failure() && github.event_name == 'schedule' && github.repository_owner == 'flexflow' }} - steps: - - name: Send Slack message - env: - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - run: | - curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly FlexFlow Docker images build failed! :x: \"}" $SLACK_WEBHOOK +# notify-slack: +# name: Notify Slack in case of failure +# runs-on: ubuntu-20.04 +# needs: docker-build +# if: ${{ failure() && github.event_name == 'schedule' && github.repository_owner == 'flexflow' }} +# steps: +# - name: Send Slack message +# env: +# SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} +# run: | +# curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly FlexFlow Docker images build failed! :x: \"}" $SLACK_WEBHOOK From f09f7fd7e38721b55981eafdeede28a4ff3578ef Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Thu, 28 Sep 2023 22:51:22 +0000 Subject: [PATCH 16/31] update --- .github/workflows/pip-install.yml | 122 +++++++++++++++--------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/.github/workflows/pip-install.yml b/.github/workflows/pip-install.yml index 2f0263b097..75d0a52aee 100644 --- a/.github/workflows/pip-install.yml +++ b/.github/workflows/pip-install.yml @@ -1,68 +1,68 @@ -name: "pip-install" -on: - pull_request: - paths: - - "cmake/**" - - "config/**" - - "python/**" - - "setup.py" - - ".github/workflows/helpers/install_dependencies.sh" - - ".github/workflows/pip-install.yml" - push: - branches: - - "master" - paths: - - "cmake/**" - - "config/**" - - "python/**" - - "setup.py" - - ".github/workflows/helpers/install_dependencies.sh" - - ".github/workflows/pip-install.yml" - workflow_dispatch: -concurrency: - group: pip-install-${{ github.head_ref || github.run_id }} - cancel-in-progress: true +# name: "pip-install" +# on: +# pull_request: +# paths: +# - "cmake/**" +# - "config/**" +# - "python/**" +# - "setup.py" +# - ".github/workflows/helpers/install_dependencies.sh" +# - ".github/workflows/pip-install.yml" +# push: +# branches: +# - "master" +# paths: +# - "cmake/**" +# - "config/**" +# - "python/**" +# - "setup.py" +# - ".github/workflows/helpers/install_dependencies.sh" +# - ".github/workflows/pip-install.yml" +# workflow_dispatch: +# concurrency: +# group: pip-install-${{ github.head_ref || github.run_id }} +# cancel-in-progress: true -jobs: - pip-install-flexflow: - name: Install FlexFlow with pip - runs-on: ubuntu-20.04 - defaults: - run: - shell: bash -l {0} # required to use an activated conda environment - steps: - - name: Checkout Git Repository - uses: actions/checkout@v3 - with: - submodules: recursive +# jobs: +# pip-install-flexflow: +# name: Install FlexFlow with pip +# runs-on: ubuntu-20.04 +# defaults: +# run: +# shell: bash -l {0} # required to use an activated conda environment +# steps: +# - name: Checkout Git Repository +# uses: actions/checkout@v3 +# with: +# submodules: recursive - - name: Free additional space on runner - run: .github/workflows/helpers/free_space_on_runner.sh +# - name: Free additional space on runner +# run: .github/workflows/helpers/free_space_on_runner.sh - - name: Install CUDA - uses: Jimver/cuda-toolkit@v0.2.8 - id: cuda-toolkit - with: - cuda: "11.1.1" - # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement - use-github-cache: "false" +# - name: Install CUDA +# uses: Jimver/cuda-toolkit@v0.2.8 +# id: cuda-toolkit +# with: +# cuda: "11.1.1" +# # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement +# use-github-cache: "false" - - name: Install system dependencies - run: .github/workflows/helpers/install_dependencies.sh +# - name: Install system dependencies +# run: .github/workflows/helpers/install_dependencies.sh - # - name: Install conda and FlexFlow dependencies - # uses: conda-incubator/setup-miniconda@v2 - # with: - # activate-environment: flexflow - # environment-file: conda/environment.yml - # auto-activate-base: false +# # - name: Install conda and FlexFlow dependencies +# # uses: conda-incubator/setup-miniconda@v2 +# # with: +# # activate-environment: flexflow +# # environment-file: conda/environment.yml +# # auto-activate-base: false - - name: Build and Install FlexFlow - run: | - export FF_HOME=$(pwd) - export FF_CUDA_ARCH=70 - pip install . --verbose +# - name: Build and Install FlexFlow +# run: | +# export FF_HOME=$(pwd) +# export FF_CUDA_ARCH=70 +# pip install . --verbose - - name: Check availability of Python flexflow.core module - run: | - python -c "import flexflow.core; exit()" +# - name: Check availability of Python flexflow.core module +# run: | +# python -c "import flexflow.core; exit()" From c87cad5d8436ae8ec9c2f73a29bd1330f6f77f10 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sat, 30 Sep 2023 12:59:37 +0000 Subject: [PATCH 17/31] delete the unuseful file --- .github/workflows/build.yml | 116 ----------------------- .github/workflows/docker-build.yml | 79 --------------- .github/workflows/per-lib-check.yml | 36 +++---- .github/workflows/pip-install.yml | 68 ------------- packaging/docker/build.sh | 88 ----------------- packaging/docker/flexflow/Dockerfile-old | 25 ----- 6 files changed, 18 insertions(+), 394 deletions(-) delete mode 100644 .github/workflows/build.yml delete mode 100644 .github/workflows/docker-build.yml delete mode 100644 .github/workflows/pip-install.yml delete mode 100755 packaging/docker/build.sh delete mode 100644 packaging/docker/flexflow/Dockerfile-old diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index aabfec54fa..0000000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,116 +0,0 @@ -# name: "build" -# on: -# pull_request: -# paths: -# - "include/**" -# - "cmake/**" -# - "config/**" -# - "python/**" -# - "src/**" -# - ".github/workflows/helpers/install_dependencies.sh" -# - ".github/workflows/build.yml" -# push: -# branches: -# - "master" -# paths: -# - "include/**" -# - "cmake/**" -# - "config/**" -# - "python/**" -# - "src/**" -# - ".github/workflows/helpers/install_dependencies.sh" -# - ".github/workflows/build.yml" -# workflow_dispatch: -# concurrency: -# group: build-${{ github.head_ref || github.run_id }} -# cancel-in-progress: true - -# jobs: -# cmake-build: -# name: Build FlexFlow with CMake -# runs-on: ubuntu-20.04 -# defaults: -# run: -# shell: bash -l {0} # required to use an activated conda environment -# strategy: -# matrix: -# gpu_backend: ["cuda"] -# fail-fast: false -# steps: -# - name: Checkout Git Repository -# uses: actions/checkout@v3 -# with: -# submodules: recursive - -# - name: Free additional space on runner -# run: .github/workflows/helpers/free_space_on_runner.sh - -# - name: Install CUDA -# uses: Jimver/cuda-toolkit@v0.2.8 -# id: cuda-toolkit -# with: -# cuda: "11.1.1" -# # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement -# use-github-cache: "false" - -# - name: Install system dependencies -# run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh - -# # - name: Install conda and FlexFlow dependencies -# # uses: conda-incubator/setup-miniconda@v2 -# # with: -# # activate-environment: flexflow -# # environment-file: conda/environment.yml -# # auto-activate-base: false - -# - name: Build FlexFlow -# run: | -# export CUDNN_DIR=/usr/local/cuda -# export CUDA_DIR=/usr/local/cuda -# export FF_HOME=$(pwd) -# export FF_GPU_BACKEND=${{ matrix.gpu_backend }} -# export FF_CUDA_ARCH=70 -# cores_available=$(nproc --all) -# n_build_cores=$(( cores_available -1 )) -# if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi -# mkdir build -# cd build -# if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then -# export FF_BUILD_ALL_EXAMPLES=ON -# export FF_BUILD_UNIT_TESTS=ON -# fi -# ../config/config.linux -# make -j $n_build_cores - -# - name: Install FlexFlow -# run: | -# export CUDNN_DIR=/usr/local/cuda -# export CUDA_DIR=/usr/local/cuda -# export FF_HOME=$(pwd) -# export FF_GPU_BACKEND=${{ matrix.gpu_backend }} -# export FF_CUDA_ARCH=70 -# cd build -# if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then -# export FF_BUILD_ALL_EXAMPLES=ON -# export FF_BUILD_UNIT_TESTS=ON -# fi -# ../config/config.linux -# sudo make install -# sudo ldconfig - -# - name: Check availability of Python flexflow.core module -# if: ${{ matrix.gpu_backend == 'cuda' }} -# run: | -# python -c "import flexflow.core; exit()" - -# - name: Run C++ unit tests -# if: ${{ matrix.gpu_backend == 'cuda' }} -# run: | -# export CUDNN_DIR=/usr/local/cuda -# export CUDA_DIR=/usr/local/cuda -# export FF_HOME=$(pwd) -# export LD_LIBRARY_PATH=$CUDA_DIR/lib64/stubs:$LD_LIBRARY_PATH -# sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 -# cd build -# ./tests/unit/unit-test - diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml deleted file mode 100644 index de233d7ecf..0000000000 --- a/.github/workflows/docker-build.yml +++ /dev/null @@ -1,79 +0,0 @@ -# name: "docker-build" -# on: -# pull_request: -# paths: -# - "docker/**" -# - "!docker/README.md" -# - ".github/workflows/docker-build.yml" -# push: -# branches: -# - "master" -# schedule: -# # Run every week on Sunday at midnight PT (3am ET / 8am UTC) to keep the docker images updated -# - cron: "0 8 * * 0" -# workflow_dispatch: - -# # Cancel outdated workflows if they are still running -# concurrency: -# group: docker-build-${{ github.head_ref || github.run_id }} -# cancel-in-progress: true - -# jobs: -# docker-build: -# name: Build and Install FlexFlow in a Docker Container -# runs-on: ubuntu-20.04 -# strategy: -# matrix: -# gpu_backend: ["cuda"] -# fail-fast: false -# steps: -# - name: Checkout Git Repository -# uses: actions/checkout@v3 -# with: -# submodules: recursive - -# - name: Free additional space on runner -# run: .github/workflows/helpers/free_space_on_runner.sh - -# - name: Build Docker container -# env: -# FF_GPU_BACKEND: ${{ matrix.gpu_backend }} -# run: | -# # On push to master, build for all compatible architectures, so that we can publish -# # a pre-built general-purpose image. On all other cases, only build for one architecture -# # to save time. -# if [[ ( ${{ github.event_name }} == 'push' || ${{ github.event_name }} == 'schedule' ) && ${GITHUB_REF#refs/heads/} == "master" ]]; then -# export FF_CUDA_ARCH=all -# else -# export FF_CUDA_ARCH=70 -# fi -# packaging/docker/build.sh flexflow - -# # - name: Check availability of Python flexflow.core module -# # if: ${{ matrix.gpu_backend == 'cuda' }} -# # run: docker run --entrypoint python flexflow-cuda:latest -c "import flexflow.core; exit()" - -# - name: Publish Docker environment image (on push to master) -# if: github.repository_owner == 'flexflow' -# env: -# FLEXFLOW_CONTAINER_TOKEN: ${{ secrets.FLEXFLOW_CONTAINER_TOKEN }} -# FF_GPU_BACKEND: ${{ matrix.gpu_backend }} -# run: | -# if [[ ( ${{ github.event_name }} == 'push' || ${{ github.event_name }} == 'schedule' ) && ${GITHUB_REF#refs/heads/} == "master" ]]; then -# ./docker/publish.sh "flexflow-environment-${FF_GPU_BACKEND}" -# ./docker/publish.sh "flexflow-${FF_GPU_BACKEND}" -# else -# echo "No need to update Docker containers in ghrc.io registry at this time." -# fi - -# notify-slack: -# name: Notify Slack in case of failure -# runs-on: ubuntu-20.04 -# needs: docker-build -# if: ${{ failure() && github.event_name == 'schedule' && github.repository_owner == 'flexflow' }} -# steps: -# - name: Send Slack message -# env: -# SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} -# run: | -# curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly FlexFlow Docker images build failed! :x: \"}" $SLACK_WEBHOOK diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index de7b5fd845..7ea466e94e 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -44,24 +44,24 @@ jobs: environment-file: packaging/conda/environment.yml auto-activate-base: false - # - name: Build lib ${{ matrix.library }} - # run: | - # export CUDNN_DIR=/usr/local/cuda - # export CUDA_DIR=/usr/local/cuda - # export FF_HOME=$(pwd) - # export FF_GPU_BACKEND=${{ matrix.gpu_backend }} - # export FF_CUDA_ARCH=70 - # cores_available=$(nproc --all) - # n_build_cores=$(( cores_available -1 )) - # if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi - # mkdir build - # cd build - # #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - # # export FF_BUILD_ALL_EXAMPLES=ON - # # export FF_BUILD_UNIT_TESTS=ON - # #fi - # ../config/config.linux - # make -j $n_build_cores ${{ matrix.library }} + - name: Build lib ${{ matrix.library }} + run: | + export CUDNN_DIR=/usr/local/cuda + export CUDA_DIR=/usr/local/cuda + export FF_HOME=$(pwd) + export FF_GPU_BACKEND=${{ matrix.gpu_backend }} + export FF_CUDA_ARCH=70 + cores_available=$(nproc --all) + n_build_cores=$(( cores_available -1 )) + if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi + mkdir build + cd build + #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then + # export FF_BUILD_ALL_EXAMPLES=ON + # export FF_BUILD_UNIT_TESTS=ON + #fi + ../config/config.linux + make -j $n_build_cores ${{ matrix.library }} diff --git a/.github/workflows/pip-install.yml b/.github/workflows/pip-install.yml deleted file mode 100644 index 75d0a52aee..0000000000 --- a/.github/workflows/pip-install.yml +++ /dev/null @@ -1,68 +0,0 @@ -# name: "pip-install" -# on: -# pull_request: -# paths: -# - "cmake/**" -# - "config/**" -# - "python/**" -# - "setup.py" -# - ".github/workflows/helpers/install_dependencies.sh" -# - ".github/workflows/pip-install.yml" -# push: -# branches: -# - "master" -# paths: -# - "cmake/**" -# - "config/**" -# - "python/**" -# - "setup.py" -# - ".github/workflows/helpers/install_dependencies.sh" -# - ".github/workflows/pip-install.yml" -# workflow_dispatch: -# concurrency: -# group: pip-install-${{ github.head_ref || github.run_id }} -# cancel-in-progress: true - -# jobs: -# pip-install-flexflow: -# name: Install FlexFlow with pip -# runs-on: ubuntu-20.04 -# defaults: -# run: -# shell: bash -l {0} # required to use an activated conda environment -# steps: -# - name: Checkout Git Repository -# uses: actions/checkout@v3 -# with: -# submodules: recursive - -# - name: Free additional space on runner -# run: .github/workflows/helpers/free_space_on_runner.sh - -# - name: Install CUDA -# uses: Jimver/cuda-toolkit@v0.2.8 -# id: cuda-toolkit -# with: -# cuda: "11.1.1" -# # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement -# use-github-cache: "false" - -# - name: Install system dependencies -# run: .github/workflows/helpers/install_dependencies.sh - -# # - name: Install conda and FlexFlow dependencies -# # uses: conda-incubator/setup-miniconda@v2 -# # with: -# # activate-environment: flexflow -# # environment-file: conda/environment.yml -# # auto-activate-base: false - -# - name: Build and Install FlexFlow -# run: | -# export FF_HOME=$(pwd) -# export FF_CUDA_ARCH=70 -# pip install . --verbose - -# - name: Check availability of Python flexflow.core module -# run: | -# python -c "import flexflow.core; exit()" diff --git a/packaging/docker/build.sh b/packaging/docker/build.sh deleted file mode 100755 index d5ddea3ec3..0000000000 --- a/packaging/docker/build.sh +++ /dev/null @@ -1,88 +0,0 @@ -#! /usr/bin/env bash -# set -euo pipefail - -# # Usage: ./build.sh - -# # https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script -# SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -# # Cd into $FF_HOME. Assumes this script is in $FF_HOME/docker -# cd "$SCRIPT_DIR/.." - -# # Get name of desired Docker image as input -# image="${1:-flexflow}" -# if [[ "$image" != @(flexflow-environment|flexflow) ]]; then -# echo "Error, image name ${image} is invalid. Choose between 'flexflow-environment' and 'flexflow'." -# exit 1 -# fi - -# # Set up GPU backend -# FF_GPU_BACKEND=${FF_GPU_BACKEND:-cuda} -# if [[ "${FF_GPU_BACKEND}" != @(cuda|hip_cuda|hip_rocm|intel) ]]; then -# echo "Error, value of FF_GPU_BACKEND (${FF_GPU_BACKEND}) is invalid. Pick between 'cuda', 'hip_cuda', 'hip_rocm' or 'intel'." -# exit 1 -# elif [[ "${FF_GPU_BACKEND}" != "cuda" ]]; then -# echo "Configuring FlexFlow to build for gpu backend: ${FF_GPU_BACKEND}" -# else -# echo "Letting FlexFlow build for a default GPU backend: cuda" -# fi - -# # Build the FlexFlow Enviroment docker image -# #docker build --build-arg "FF_GPU_BACKEND=${FF_GPU_BACKEND}" -t "flexflow-environment-${FF_GPU_BACKEND}" -f docker/flexflow-environment/Dockerfile . - -# # If the user only wants to build the environment image, we are done -# if [[ "$image" == "flexflow-environment" ]]; then -# exit 0 -# fi - -# # Gather arguments needed to build the FlexFlow image -# # Get number of cores available on the machine. Build with all cores but one, to prevent RAM choking -# cores_available=$(nproc --all) -# n_build_cores=$(( cores_available -1 )) - -# # If FF_CUDA_ARCH is set to autodetect, we need to perform the autodetection here because the Docker -# # image will not have access to GPUs during the build phase (due to a Docker restriction). In all other -# # cases, we pass the value of FF_CUDA_ARCH directly to Cmake. -# if [[ "${FF_CUDA_ARCH:-autodetect}" == "autodetect" ]]; then -# # Get CUDA architecture(s), if GPUs are available -# cat << EOF > ./get_gpu_arch.cu -# #include -# int main() { -# int count = 0; -# if (cudaSuccess != cudaGetDeviceCount(&count)) return -1; -# if (count == 0) return -1; -# for (int device = 0; device < count; ++device) { -# cudaDeviceProp prop; -# if (cudaSuccess == cudaGetDeviceProperties(&prop, device)) -# printf("%d ", prop.major*10+prop.minor); -# } -# return 0; -# } -# EOF -# gpu_arch_codes="" -# if command -v nvcc &> /dev/null -# then -# nvcc ./get_gpu_arch.cu -o ./get_gpu_arch -# gpu_arch_codes="$(./get_gpu_arch)" -# fi -# gpu_arch_codes="$(echo "$gpu_arch_codes" | xargs -n1 | sort -u | xargs)" -# gpu_arch_codes="${gpu_arch_codes// /,}" -# rm -f ./get_gpu_arch.cu ./get_gpu_arch - -# if [[ -n "$gpu_arch_codes" ]]; then -# echo "Host machine has GPUs with architecture codes: $gpu_arch_codes" -# echo "Configuring FlexFlow to build for the $gpu_arch_codes code(s)." -# FF_CUDA_ARCH="${gpu_arch_codes}" -# export FF_CUDA_ARCH -# else -# echo "FF_CUDA_ARCH is set to 'autodetect', but the host machine does not have any compatible GPUs." -# exit 1 -# fi -# fi - -# # Build FlexFlow Docker image -# # shellcheck source=/dev/null -# #. config/config.linux get-docker-configs -# # Set value of BUILD_CONFIGS -# get_build_configs - -# #docker build --build-arg "N_BUILD_CORES=${n_build_cores}" --build-arg "FF_GPU_BACKEND=${FF_GPU_BACKEND}" --build-arg "BUILD_CONFIGS=${BUILD_CONFIGS}" -t "flexflow-${FF_GPU_BACKEND}" -f docker/flexflow/Dockerfile . diff --git a/packaging/docker/flexflow/Dockerfile-old b/packaging/docker/flexflow/Dockerfile-old deleted file mode 100644 index 06e69ba4d3..0000000000 --- a/packaging/docker/flexflow/Dockerfile-old +++ /dev/null @@ -1,25 +0,0 @@ -ARG FF_GPU_BACKEND "cuda" -FROM flexflow-environment-$FF_GPU_BACKEND:latest - -LABEL org.opencontainers.image.source=https://github.com/flexflow/FlexFlow -LABEL org.opencontainers.image.description="FlexFlow container" - -# Copy FlexFlow repository -RUN mkdir FlexFlow -ENV FF_HOME /usr/FlexFlow -WORKDIR ${FF_HOME} -COPY . . - -# Args to build FlexFlow -ARG BUILD_CONFIGS -ARG N_BUILD_CORES - -# Build and install C++ and Python versions of FlexFlow -RUN mkdir -p build && cd build && \ - eval "$BUILD_CONFIGS" ../config/config.linux && \ - make -j $N_BUILD_CORES && \ - eval "$BUILD_CONFIGS" ../config/config.linux && \ - make install && \ - ldconfig - -ENTRYPOINT ["/bin/bash"] From 04bab53181553f936160d4696e8a9598e110ae04 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 1 Oct 2023 03:12:33 +0000 Subject: [PATCH 18/31] delete and comment --- .github/workflows/per-lib-check.yml | 34 +++++------ .../flexflow-environment/Dockerfile-old | 57 ------------------- 2 files changed, 17 insertions(+), 74 deletions(-) delete mode 100644 packaging/docker/flexflow-environment/Dockerfile-old diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 7ea466e94e..c66ea0ae39 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -45,23 +45,23 @@ jobs: auto-activate-base: false - name: Build lib ${{ matrix.library }} - run: | - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - export FF_GPU_BACKEND=${{ matrix.gpu_backend }} - export FF_CUDA_ARCH=70 - cores_available=$(nproc --all) - n_build_cores=$(( cores_available -1 )) - if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi - mkdir build - cd build - #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - # export FF_BUILD_ALL_EXAMPLES=ON - # export FF_BUILD_UNIT_TESTS=ON - #fi - ../config/config.linux - make -j $n_build_cores ${{ matrix.library }} + # run: | + # export CUDNN_DIR=/usr/local/cuda + # export CUDA_DIR=/usr/local/cuda + # export FF_HOME=$(pwd) + # export FF_GPU_BACKEND=${{ matrix.gpu_backend }} + # export FF_CUDA_ARCH=70 + # cores_available=$(nproc --all) + # n_build_cores=$(( cores_available -1 )) + # if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi + # mkdir build + # cd build + # #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then + # # export FF_BUILD_ALL_EXAMPLES=ON + # # export FF_BUILD_UNIT_TESTS=ON + # #fi + # ../config/config.linux + # make -j $n_build_cores ${{ matrix.library }} diff --git a/packaging/docker/flexflow-environment/Dockerfile-old b/packaging/docker/flexflow-environment/Dockerfile-old deleted file mode 100644 index e3c1cf8fc4..0000000000 --- a/packaging/docker/flexflow-environment/Dockerfile-old +++ /dev/null @@ -1,57 +0,0 @@ -# FROM nvidia/cuda:11.7.0-cudnn8-devel-ubuntu20.04 - -# LABEL org.opencontainers.image.source=https://github.com/flexflow/FlexFlow -# LABEL org.opencontainers.image.description="FlexFlow environment container" - -# # Install basic dependencies -# RUN apt-get update && apt-get install -y --no-install-recommends wget sudo binutils git zlib1g-dev lsb-release nano libhdf5-dev && \ -# rm -rf /var/lib/apt/lists/* /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/nvidia-ml.list && \ -# apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends software-properties-common && \ -# apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends build-essential apt-utils \ -# ca-certificates libssl-dev curl unzip htop && DEBIAN_FRONTEND=noninteractive \ -# apt-get install -y software-properties-common && \ -# add-apt-repository ppa:ubuntu-toolchain-r/test && \ -# apt-get update -y && \ -# apt-get upgrade -y libstdc++6 - -# # Install Python3 with Miniconda -# RUN wget -c -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ -# mv Miniconda3-latest-Linux-x86_64.sh ~/Miniconda3-latest-Linux-x86_64.sh && \ -# chmod +x ~/Miniconda3-latest-Linux-x86_64.sh && \ -# bash ~/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda && \ -# rm ~/Miniconda3-latest-Linux-x86_64.sh && \ -# /opt/conda/bin/conda upgrade --all && \ -# /opt/conda/bin/conda install conda-build conda-verify && \ -# /opt/conda/bin/conda clean -ya - -# # Optionally install HIP dependencies -# # Note that amd's docs say to also install the `hip-runtime-nvidia` package. This -# # package attempts to re-install cuda even though cuda is already installed -# # in the container. It also attempts to install packages for a graphical install. -# # For our container, we don't need `hip-runtime-nvidia` -# ARG FF_GPU_BACKEND "cuda" -# RUN if [ "$FF_GPU_BACKEND" = "hip_cuda" ] || [ "$FF_GPU_BACKEND" = "hip_rocm" ]; then \ -# echo "FF_GPU_BACKEND: ${FF_GPU_BACKEND}. Installing HIP dependencies"; \ -# wget https://repo.radeon.com/amdgpu-install/22.20.5/ubuntu/bionic/amdgpu-install_22.20.50205-1_all.deb; \ -# apt-get install -y ./amdgpu-install_22.20.50205-1_all.deb; \ -# rm ./amdgpu-install_22.20.50205-1_all.deb; \ -# amdgpu-install -y --usecase=hip,rocm --no-dkms; \ -# apt-get install -y hip-dev hipblas miopen-hip rocm-hip-sdk; \ -# else \ -# echo "FF_GPU_BACKEND: ${FF_GPU_BACKEND}. Skipping installing HIP dependencies"; \ -# fi -# RUN rm -rf /var/lib/apt/lists/* - -# # Set env vars -# ENV PATH /opt/conda/bin:$PATH -# ENV CUDNN_DIR /usr/local/cuda -# ENV CUDA_DIR /usr/local/cuda - -# # Install python packages and other dependencies -# RUN conda install -c conda-forge cmake make pillow cmake-build-extension pybind11 numpy pandas keras-preprocessing -# # Install CPU-only Pytorch and related dependencies -# RUN conda install pytorch torchvision torchaudio cpuonly -c pytorch -# RUN conda install -c conda-forge onnx transformers sentencepiece -# RUN pip3 install tensorflow - -# ENTRYPOINT ["/bin/bash"] From 196fc14de63ca60d0112b2c4e436c5a6062d9c67 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 1 Oct 2023 03:39:01 +0000 Subject: [PATCH 19/31] delete old --- .github/workflows/gpt-ci.yml-old | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .github/workflows/gpt-ci.yml-old diff --git a/.github/workflows/gpt-ci.yml-old b/.github/workflows/gpt-ci.yml-old deleted file mode 100644 index e69de29bb2..0000000000 From 6f20c80172f2b7d2bc4b493f7c1a33e96f8dc391 Mon Sep 17 00:00:00 2001 From: lambda7xx Date: Sun, 1 Oct 2023 03:58:48 +0000 Subject: [PATCH 20/31] uncomment --- .github/workflows/per-lib-check.yml | 34 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index c66ea0ae39..7ea466e94e 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -45,23 +45,23 @@ jobs: auto-activate-base: false - name: Build lib ${{ matrix.library }} - # run: | - # export CUDNN_DIR=/usr/local/cuda - # export CUDA_DIR=/usr/local/cuda - # export FF_HOME=$(pwd) - # export FF_GPU_BACKEND=${{ matrix.gpu_backend }} - # export FF_CUDA_ARCH=70 - # cores_available=$(nproc --all) - # n_build_cores=$(( cores_available -1 )) - # if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi - # mkdir build - # cd build - # #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - # # export FF_BUILD_ALL_EXAMPLES=ON - # # export FF_BUILD_UNIT_TESTS=ON - # #fi - # ../config/config.linux - # make -j $n_build_cores ${{ matrix.library }} + run: | + export CUDNN_DIR=/usr/local/cuda + export CUDA_DIR=/usr/local/cuda + export FF_HOME=$(pwd) + export FF_GPU_BACKEND=${{ matrix.gpu_backend }} + export FF_CUDA_ARCH=70 + cores_available=$(nproc --all) + n_build_cores=$(( cores_available -1 )) + if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi + mkdir build + cd build + #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then + # export FF_BUILD_ALL_EXAMPLES=ON + # export FF_BUILD_UNIT_TESTS=ON + #fi + ../config/config.linux + make -j $n_build_cores ${{ matrix.library }} From 7294f64ac451f11d2bd544f8278670b1a281e367 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 20:00:45 -0700 Subject: [PATCH 21/31] Update fmt version --- deps/fmt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/fmt b/deps/fmt index a33701196a..f5e54359df 160000 --- a/deps/fmt +++ b/deps/fmt @@ -1 +1 @@ -Subproject commit a33701196adfad74917046096bf5a2aa0ab0bb50 +Subproject commit f5e54359df4c26b6230fc61d38aa294581393084 From 4913fa887e657f1ba5bcb437721eadd103f0b4c3 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 21:02:07 -0700 Subject: [PATCH 22/31] dematrix build job --- .github/workflows/clang-format-check.yml | 12 +++--------- .github/workflows/per-lib-check.yml | 24 +++++++++++++++++++----- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml index fb93fd6b5b..2c1806d2c5 100644 --- a/.github/workflows/clang-format-check.yml +++ b/.github/workflows/clang-format-check.yml @@ -5,20 +5,14 @@ jobs: name: Formatting Check runs-on: ubuntu-latest strategy: - fail-fast: false + fail-fast: true matrix: path: - - check: "lib/compiler" - - check: "lib/ffi" - - check: "lib/kernels" - - check: "lib/op-attrs" - - check: "lib/pcg" - - check: "lib/runtime" - - check: "lib/substitutions" - - check: "lib/utils" + - check: "lib" - check: "tests" - check: "examples" - check: "bindings" + - check: "bin" steps: - uses: actions/checkout@v2 - name: Run clang-format style check for C/C++/Protobuf programs. diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 7ea466e94e..393c21a3a1 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -15,7 +15,6 @@ jobs: max-parallel: 1 matrix: gpu_backend: ["cuda"] - library: ["runtime", "ffi","compiler","kernels","op-attrs","pcg","substitutions","utils"] fail-fast: false steps: - name: Checkout Git Repository @@ -44,15 +43,14 @@ jobs: environment-file: packaging/conda/environment.yml auto-activate-base: false - - name: Build lib ${{ matrix.library }} + - name: Run cmake run: | export CUDNN_DIR=/usr/local/cuda export CUDA_DIR=/usr/local/cuda export FF_HOME=$(pwd) export FF_GPU_BACKEND=${{ matrix.gpu_backend }} export FF_CUDA_ARCH=70 - cores_available=$(nproc --all) - n_build_cores=$(( cores_available -1 )) + n_build_cores=$(( $(nproc) cores_available -1 )) if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi mkdir build cd build @@ -61,7 +59,23 @@ jobs: # export FF_BUILD_UNIT_TESTS=ON #fi ../config/config.linux - make -j $n_build_cores ${{ matrix.library }} + - name: Build utils + run: | + cd build + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils + - name: Build op-attrs + run: | + cd build + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils + - name: Build pcg + run: | + cd build + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils + + - name: Build kernels + run: | + cd build + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils From d629b82e324c70c00c58a3d3fa4be8199040f84a Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 21:26:14 -0700 Subject: [PATCH 23/31] Ci fixes --- .github/workflows/build-skip.yml | 32 ---- .github/workflows/clang-format-check.yml | 1 + .github/workflows/docker-build-skip.yml | 24 --- .github/workflows/gpu-ci-daemon.yml | 38 ----- .github/workflows/gpu-ci-skip.yml | 38 ----- .github/workflows/multinode-test.yml | 191 ----------------------- .github/workflows/per-lib-check.yml | 32 ++-- .github/workflows/pip-install-skip.yml | 21 --- 8 files changed, 17 insertions(+), 360 deletions(-) delete mode 100644 .github/workflows/build-skip.yml delete mode 100644 .github/workflows/docker-build-skip.yml delete mode 100644 .github/workflows/gpu-ci-daemon.yml delete mode 100644 .github/workflows/gpu-ci-skip.yml delete mode 100644 .github/workflows/multinode-test.yml delete mode 100644 .github/workflows/pip-install-skip.yml diff --git a/.github/workflows/build-skip.yml b/.github/workflows/build-skip.yml deleted file mode 100644 index e3e23f13f8..0000000000 --- a/.github/workflows/build-skip.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: "build" -on: - pull_request: - paths-ignore: - - "include/**" - - "cmake/**" - - "config/**" - - "python/**" - - "src/**" - - ".github/workflows/helpers/install_dependencies.sh" - - ".github/workflows/build.yml" - workflow_dispatch: -concurrency: - group: build-skip-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - cmake-build: - name: Build FlexFlow with CMake - runs-on: ubuntu-20.04 - strategy: - matrix: - gpu_backend: ["cuda"] - fail-fast: false - steps: - - run: 'echo "No build required"' - - makefile-build: - name: Build FlexFlow with the Makefile - runs-on: ubuntu-20.04 - steps: - - run: 'echo "No build required"' diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml index 2c1806d2c5..672644388c 100644 --- a/.github/workflows/clang-format-check.yml +++ b/.github/workflows/clang-format-check.yml @@ -13,6 +13,7 @@ jobs: - check: "examples" - check: "bindings" - check: "bin" + exclude: '\.proto$' steps: - uses: actions/checkout@v2 - name: Run clang-format style check for C/C++/Protobuf programs. diff --git a/.github/workflows/docker-build-skip.yml b/.github/workflows/docker-build-skip.yml deleted file mode 100644 index 9fb3f71f48..0000000000 --- a/.github/workflows/docker-build-skip.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: "docker-build" -on: - pull_request: - paths-ignore: - - "docker/**" - - "!docker/README.md" - - ".github/workflows/docker-build.yml" - workflow_dispatch: - -# Cancel outdated workflows if they are still running -concurrency: - group: docker-build-skip-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - docker-build: - name: Build and Install FlexFlow in a Docker Container - runs-on: ubuntu-20.04 - strategy: - matrix: - gpu_backend: ["cuda"] - fail-fast: false - steps: - - run: 'echo "No docker-build required"' diff --git a/.github/workflows/gpu-ci-daemon.yml b/.github/workflows/gpu-ci-daemon.yml deleted file mode 100644 index 603b44c34e..0000000000 --- a/.github/workflows/gpu-ci-daemon.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: "gpu-ci-daemon" -on: - schedule: - # Run every 5 mins - - cron: "*/5 * * * *" - workflow_dispatch: - -concurrency: - group: gpu-ci-daemon-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - gpu-ci-daemon: - name: GPU CI Daemon - # Prevent Github from running the workflow on forks - if: github.repository_owner == 'flexflow' - runs-on: ubuntu-20.04 - env: - FLEXFLOW_TOKEN: ${{ secrets.FLEXFLOW_TOKEN }} - FLEXFLOW_RUNNER_INSTANCE_ID: ${{ secrets.FLEXFLOW_RUNNER_INSTANCE_ID }} - - steps: - - name: Checkout Git Repository - uses: actions/checkout@v3 - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-east-2 - - - name: Run daemon - run: | - pip3 install pip --upgrade - pip3 install pyopenssl --upgrade - pip3 install pygithub - python3 .github/workflows/helpers/gpu_ci_helper.py --daemon diff --git a/.github/workflows/gpu-ci-skip.yml b/.github/workflows/gpu-ci-skip.yml deleted file mode 100644 index 012302a57f..0000000000 --- a/.github/workflows/gpu-ci-skip.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: "gpu-ci" -on: - pull_request: - paths-ignore: - - "cmake/**" - - "config/**" - - "python/**" - - "setup.py" - - "include/**" - - "src/**" - - ".github/workflows/gpu-ci.yml" - - "tests/multi_gpu_tests.sh" - workflow_dispatch: - -concurrency: - group: gpu-ci-skip-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - gpu-ci-concierge: - name: GPU CI Concierge - runs-on: ubuntu-20.04 - steps: - - run: 'echo "No gpu-ci required"' - - python-interface-check: - name: Check Python Interface - runs-on: ubuntu-20.04 - needs: gpu-ci-concierge - steps: - - run: 'echo "No gpu-ci required"' - - gpu-ci-flexflow: - name: Single Machine, Multiple GPUs Tests - runs-on: ubuntu-20.04 - needs: gpu-ci-concierge - steps: - - run: 'echo "No gpu-ci required"' diff --git a/.github/workflows/multinode-test.yml b/.github/workflows/multinode-test.yml deleted file mode 100644 index cfe3629211..0000000000 --- a/.github/workflows/multinode-test.yml +++ /dev/null @@ -1,191 +0,0 @@ -name: "multinode-test" -on: - schedule: - # Run every other day (Monday, Wednesday, Friday, and Saturday) at midnight PT (3am ET / 8am UTC) - - cron: "0 8 * * 1,3,5,6" - workflow_dispatch: - -concurrency: - group: multinode-test-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - gpu-ci-concierge: - name: GPU CI Concierge - # Prevent Github from running the workflow on forks - if: github.repository_owner == 'flexflow' - runs-on: ubuntu-20.04 - env: - FLEXFLOW_TOKEN: ${{ secrets.GITHUB_TOKEN }} - steps: - - name: Checkout Git Repository - uses: actions/checkout@v3 - - - name: Wait for daemon to be done - run: | - pip3 install pip --upgrade - pip3 install pyopenssl --upgrade - pip3 install pygithub - python3 .github/workflows/helpers/gpu_ci_helper.py - - multinode-gpu-test-mpi: - name: Multinode GPU Test with MPI - # Prevent Github from running the workflow on forks - if: github.repository_owner == 'flexflow' - runs-on: self-hosted - needs: gpu-ci-concierge - # 10h timeout, instead of default of 360min (6h) - timeout-minutes: 600 - container: - image: ghcr.io/flexflow/flexflow-environment-cuda:latest - options: --gpus all --shm-size=8192m - steps: - - name: Install updated git version - run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git - - - name: Checkout Git Repository - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Install MPI - run: sudo apt-get install -y --no-install-recommends openmpi-bin openmpi-common libopenmpi-dev - - - name: Build and Install FlexFlow - run: | - export PATH=/opt/conda/bin:$PATH - export FF_HOME=$(pwd) - export FF_LEGION_NETWORKS=gasnet - export FF_GASNET_CONDUIT=mpi - pip install . --verbose - - - name: Check FlexFlow Python interface (pip) - run: | - export FF_HOME=$(pwd) - export PATH=/opt/conda/bin:$PATH - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib - ./tests/python_interface_test.sh after-installation - - - name: Run multi-gpu tests - run: | - export PATH=/opt/conda/bin:$PATH - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib - export OMPI_ALLOW_RUN_AS_ROOT=1 - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 - export OMPI_MCA_btl_vader_single_copy_mechanism=none - ./tests/multi_gpu_tests.sh 2 2 - - multinode-gpu-test-ucx: - name: Multinode GPU Test with UCX - # Prevent Github from running the workflow on forks - if: github.repository_owner == 'flexflow' - runs-on: self-hosted - needs: gpu-ci-concierge - container: - image: ghcr.io/flexflow/flexflow-environment-cuda:latest - options: --gpus all --shm-size=8192m - # 10h timeout, instead of default of 360min (6h) - timeout-minutes: 600 - steps: - - name: Install updated git version - run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git - - - name: Checkout Git Repository - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Install MPI - run: sudo apt-get install -y --no-install-recommends openmpi-bin openmpi-common libopenmpi-dev - - - name: Build and Install FlexFlow - run: | - export PATH=/opt/conda/bin:$PATH - export FF_HOME=$(pwd) - export FF_LEGION_NETWORKS=gasnet - export FF_GASNET_CONDUIT=ucx - pip install . --verbose - - - name: Check FlexFlow Python interface (pip) - run: | - export FF_HOME=$(pwd) - export PATH=/opt/conda/bin:$PATH - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib - ./tests/python_interface_test.sh after-installation - - - name: Run multi-gpu tests - run: | - export PATH=/opt/conda/bin:$PATH - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib - export OMPI_ALLOW_RUN_AS_ROOT=1 - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 - export OMPI_MCA_btl_vader_single_copy_mechanism=none - ./tests/multi_gpu_tests.sh 2 2 - - multinode-gpu-test-native-ucx: - name: Multinode GPU Test with native UCX - # Prevent Github from running the workflow on forks - if: github.repository_owner == 'flexflow' - runs-on: self-hosted - needs: gpu-ci-concierge - container: - image: ghcr.io/flexflow/flexflow-environment-cuda:latest - options: --gpus all --shm-size=8192m - steps: - - name: Install updated git version - run: sudo add-apt-repository ppa:git-core/ppa -y && sudo apt update -y && sudo apt install -y --no-install-recommends git - - - name: Checkout Git Repository - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Install MPI - run: sudo apt-get install -y --no-install-recommends openmpi-bin openmpi-common libopenmpi-dev - - - name: Build and Install FlexFlow - run: | - export PATH=/opt/conda/bin:$PATH - export FF_HOME=$(pwd) - export FF_LEGION_NETWORKS=ucx - pip install . --verbose - - - name: Check FlexFlow Python interface (pip) - run: | - export FF_HOME=$(pwd) - export PATH=/opt/conda/bin:$PATH - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib - ./tests/python_interface_test.sh after-installation - - - name: Run multi-gpu tests - run: | - export PATH=/opt/conda/bin:$PATH - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib - export OMPI_ALLOW_RUN_AS_ROOT=1 - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 - export OMPI_MCA_btl_vader_single_copy_mechanism=none - ./tests/multi_gpu_tests.sh 2 2 - - notify-slack: - name: Notify Slack in case of failure - runs-on: ubuntu-20.04 - needs: [multinode-gpu-test-mpi, multinode-gpu-test-ucx, multinode-gpu-test-native-ucx] - if: ${{ failure() && github.event_name == 'schedule' }} - steps: - - name: Send Slack message - env: - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} - run: | - curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly multinode GPU test failed! :x: \"}" $SLACK_WEBHOOK diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 393c21a3a1..8ff2503db1 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -60,22 +60,22 @@ jobs: #fi ../config/config.linux - - name: Build utils - run: | - cd build - make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils + - name: Build utils + run: | + cd build + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils - - name: Build op-attrs - run: | - cd build - make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils + - name: Build op-attrs + run: | + cd build + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils - - name: Build pcg - run: | - cd build - make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils + - name: Build pcg + run: | + cd build + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils - - name: Build kernels - run: | - cd build - make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils + - name: Build kernels + run: | + cd build + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils diff --git a/.github/workflows/pip-install-skip.yml b/.github/workflows/pip-install-skip.yml deleted file mode 100644 index 68b1afb9ff..0000000000 --- a/.github/workflows/pip-install-skip.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: "pip-install" -on: - pull_request: - paths-ignore: - - "cmake/**" - - "config/**" - - "python/**" - - "setup.py" - - ".github/workflows/helpers/install_dependencies.sh" - - ".github/workflows/pip-install.yml" - workflow_dispatch: -concurrency: - group: pip-install-skip-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - pip-install-flexflow: - name: Install FlexFlow with pip - runs-on: ubuntu-20.04 - steps: - - run: 'echo "No pip-install required"' From b33de04e20b53e272ea8205b17a2667874ce65d0 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 21:35:38 -0700 Subject: [PATCH 24/31] Remove unnecessary conda install --- .github/workflows/per-lib-check.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 8ff2503db1..bf0c783f3f 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -36,12 +36,12 @@ jobs: - name: Install system dependencies run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh - - name: Install conda and FlexFlow dependencies - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: flexflow - environment-file: packaging/conda/environment.yml - auto-activate-base: false + # - name: Install conda and FlexFlow dependencies + # uses: conda-incubator/setup-miniconda@v2 + # with: + # activate-environment: flexflow + # environment-file: packaging/conda/environment.yml + # auto-activate-base: false - name: Run cmake run: | From dc8b2ce275135f3577eda007b0d752d328fd349c Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 21:41:36 -0700 Subject: [PATCH 25/31] Enable ccache CI --- .github/workflows/per-lib-check.yml | 5 ++++- config/config.linux | 13 ++----------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index bf0c783f3f..38f03f2172 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -33,6 +33,9 @@ jobs: # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement use-github-cache: "false" + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + - name: Install system dependencies run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh @@ -58,7 +61,7 @@ jobs: # export FF_BUILD_ALL_EXAMPLES=ON # export FF_BUILD_UNIT_TESTS=ON #fi - ../config/config.linux + ../config/config.linux -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build utils run: | diff --git a/config/config.linux b/config/config.linux index a0771e1271..2b87ec0eb5 100755 --- a/config/config.linux +++ b/config/config.linux @@ -79,14 +79,5 @@ function get_build_configs() { BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} CUDNN_DIR=${CUDNN_DIR} CUDA_DIR=${CUDA_DIR} FF_USE_PYTHON=${FF_USE_PYTHON} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} FF_UCX_URL=${FF_UCX_URL} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES} FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS} FF_USE_PREBUILT_NCCL=${FF_USE_PREBUILT_NCCL} FF_USE_PREBUILT_LEGION=${FF_USE_PREBUILT_LEGION} FF_USE_ALL_PREBUILT_LIBRARIES=${FF_USE_ALL_PREBUILT_LIBRARIES} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND}" } -if [ -n "$1" ]; then - if [ "$1" != "get-docker-configs" ]; then - . $(dirname $0)/config.inc - # You can pass the name of the variable you want to print out as $1. This - # is used in the python setup script to get the cmake config - echo "${!1}" - fi -else - . $(dirname $0)/config.inc - run_cmake $* -fi +. $(dirname $0)/config.inc +run_cmake $* From 3d478fcb8ae05cc9741981650b332754e5a5a97c Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 21:44:46 -0700 Subject: [PATCH 26/31] Actually build more than just utils --- .github/workflows/per-lib-check.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 38f03f2172..3004303ecd 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -71,14 +71,14 @@ jobs: - name: Build op-attrs run: | cd build - make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) op-attrs - name: Build pcg run: | cd build - make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) pcg - name: Build kernels run: | cd build - make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) utils + make -j $(( $(nproc) < 2 ? 1 : $(nproc)-1 )) kernels From 1809c227c9792d86a12c8015faa714a954405188 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 21:56:34 -0700 Subject: [PATCH 27/31] Try to speed up cuda install --- .github/workflows/per-lib-check.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 3004303ecd..14b530ebfd 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -31,7 +31,8 @@ jobs: with: cuda: "12.1.0" # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement - use-github-cache: "false" + use-github-cache: "true" + linux-local-args: ["--toolkit"] - name: ccache uses: hendrikmuhs/ccache-action@v1.2 From 81f08775c522df6c1d42abf2b8e432709e07111b Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 21:58:00 -0700 Subject: [PATCH 28/31] Fix invalid yaml --- .github/workflows/per-lib-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 14b530ebfd..76b9d23ae2 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -32,7 +32,7 @@ jobs: cuda: "12.1.0" # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement use-github-cache: "true" - linux-local-args: ["--toolkit"] + linux-local-args: '["--toolkit"]' - name: ccache uses: hendrikmuhs/ccache-action@v1.2 From d83226a25ab704c66fb31ecf4e05703832be5304 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 22:01:21 -0700 Subject: [PATCH 29/31] Try to speed up nvcc builds --- .github/workflows/per-lib-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 76b9d23ae2..d93a60dcfd 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -62,7 +62,7 @@ jobs: # export FF_BUILD_ALL_EXAMPLES=ON # export FF_BUILD_UNIT_TESTS=ON #fi - ../config/config.linux -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + ../config/config.linux -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache - name: Build utils run: | From 55f99ccbf9038edf6cde0a9bca026322c6bd8138 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 22:15:29 -0700 Subject: [PATCH 30/31] Hopefully enable prebuilt nccl --- CMakeLists.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bbdf13b22..418a2a7538 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,18 @@ project(FlexFlow) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_LIST_DIR}/cmake) +# Detect OS type and Linux version (if it applies) +set(LINUX_VERSION "") +if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") + find_program(LSB_RELEASE_EXEC lsb_release) + if(LSB_RELEASE_EXEC) + execute_process(COMMAND ${LSB_RELEASE_EXEC} -r --short + OUTPUT_VARIABLE LINUX_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "Linux Version: ${LINUX_VERSION}") + endif() +endif() + set(FF_MAX_DIM "5" CACHE STRING "Maximum tensor order") set(FF_MAX_OPNAME "128" CACHE STRING "Maximum op name length") set(FF_MAX_NUM_OUTPUTS "256" CACHE STRING "Maximum number of outputs (per operator)") From af693020aaba3045946a6677ba3eca765bddd298 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 5 Oct 2023 22:46:38 -0700 Subject: [PATCH 31/31] Disable cuda cache --- .github/workflows/per-lib-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index d93a60dcfd..f21621b265 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -31,7 +31,7 @@ jobs: with: cuda: "12.1.0" # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement - use-github-cache: "true" + use-github-cache: "false" linux-local-args: '["--toolkit"]' - name: ccache