From 3c532184f95e83d2fbc7c21f56c970d6e9f05470 Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Mon, 21 Aug 2023 14:22:23 -0600 Subject: [PATCH 01/14] Add a per-lib-check github action --- .github/workflows/per-lib-check.yml | 84 +++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 .github/workflows/per-lib-check.yml diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml new file mode 100644 index 0000000000..cc25e14da9 --- /dev/null +++ b/.github/workflows/per-lib-check.yml @@ -0,0 +1,84 @@ +name: "per-lib-checks" +on: + pull_request: + paths: + - "include/**" + - "cmake/**" + - "config/**" + - "python/**" + - "src/**" + - ".github/workflows/helpers/install_dependencies.sh" + - ".github/workflows/build.yml" + push: + branches: + - "master" + paths: + - "include/**" + - "cmake/**" + - "config/**" + - "python/**" + - "src/**" + - ".github/workflows/helpers/install_dependencies.sh" + - ".github/workflows/build.yml" + workflow_dispatch: +concurrency: + group: build-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + cmake-build: + name: Build FlexFlow with CMake + runs-on: ubuntu-20.04 + defaults: + run: + shell: bash -l {0} # required to use an activated conda environment + strategy: + matrix: + gpu_backend: ["cuda", "hip_rocm"] + fail-fast: false + steps: + - name: Checkout Git Repository + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Free additional space on runner + run: .github/workflows/helpers/free_space_on_runner.sh + + - name: Install CUDA + uses: Jimver/cuda-toolkit@v0.2.8 + id: cuda-toolkit + with: + cuda: "11.1.1" + # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement + use-github-cache: "false" + + - name: Install system dependencies + run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh + + - name: Install conda and FlexFlow dependencies + uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: flexflow + environment-file: conda/environment.yml + auto-activate-base: false + + - name: Build FFI libs + run: | + export CUDNN_DIR=/usr/local/cuda + export CUDA_DIR=/usr/local/cuda + export FF_HOME=$(pwd) + export FF_GPU_BACKEND=${{ matrix.gpu_backend }} + export FF_CUDA_ARCH=70 + cores_available=$(nproc --all) + n_build_cores=$(( cores_available -1 )) + if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi + mkdir build + cd build + if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then + export FF_BUILD_ALL_EXAMPLES=ON + export FF_BUILD_UNIT_TESTS=ON + fi + ../config/config.linux + make ffi -j $n_build_cores + From b703a075e69384c0a8712dace43821f88bac7e84 Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Mon, 21 Aug 2023 14:25:19 -0600 Subject: [PATCH 02/14] per lib checks trigger on push for any branch, not just master --- .github/workflows/per-lib-check.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index cc25e14da9..5eaa16ca00 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -10,8 +10,6 @@ on: - ".github/workflows/helpers/install_dependencies.sh" - ".github/workflows/build.yml" push: - branches: - - "master" paths: - "include/**" - "cmake/**" From d6689d8b266eaac54c49f2861be43a5bdb391ed5 Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Mon, 21 Aug 2023 14:28:11 -0600 Subject: [PATCH 03/14] Attempt two on trying to fix workflow trigger --- .github/workflows/per-lib-check.yml | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 5eaa16ca00..0556c51ca4 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -1,24 +1,5 @@ name: "per-lib-checks" -on: - pull_request: - paths: - - "include/**" - - "cmake/**" - - "config/**" - - "python/**" - - "src/**" - - ".github/workflows/helpers/install_dependencies.sh" - - ".github/workflows/build.yml" - push: - paths: - - "include/**" - - "cmake/**" - - "config/**" - - "python/**" - - "src/**" - - ".github/workflows/helpers/install_dependencies.sh" - - ".github/workflows/build.yml" - workflow_dispatch: +on: [push, pull_request, workflow_dispatch] concurrency: group: build-${{ github.head_ref || github.run_id }} cancel-in-progress: true From 772e2e480c312291166ace186eb200ab215fdb7f Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Mon, 21 Aug 2023 14:44:48 -0600 Subject: [PATCH 04/14] Fix conda's environment.yml location --- .github/workflows/per-lib-check.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 0556c51ca4..ee56c848ad 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -39,7 +39,7 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: activate-environment: flexflow - environment-file: conda/environment.yml + environment-file: packaging/conda/environment.yml auto-activate-base: false - name: Build FFI libs @@ -61,3 +61,5 @@ jobs: ../config/config.linux make ffi -j $n_build_cores + + From a8d9d2a38e1a0a507114299f25b4505bad2af3ce Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Mon, 21 Aug 2023 20:46:49 -0600 Subject: [PATCH 05/14] WIP: Testing with op-attrs --- .github/workflows/per-lib-check.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index ee56c848ad..f3a93c0372 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -42,7 +42,7 @@ jobs: environment-file: packaging/conda/environment.yml auto-activate-base: false - - name: Build FFI libs + - name: Build op-attrs libs run: | export CUDNN_DIR=/usr/local/cuda export CUDA_DIR=/usr/local/cuda @@ -59,7 +59,7 @@ jobs: export FF_BUILD_UNIT_TESTS=ON fi ../config/config.linux - make ffi -j $n_build_cores + make -j $n_build_cores op-attrs From ff865c059178216cc6f12d232effba5ab372ba8b Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Mon, 21 Aug 2023 22:05:44 -0600 Subject: [PATCH 06/14] Add other lib directories --- .github/workflows/per-lib-check.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index f3a93c0372..9020d6732d 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -14,6 +14,7 @@ jobs: strategy: matrix: gpu_backend: ["cuda", "hip_rocm"] + library: ["compiler","ffi","kernels","op-attrs","pcg","runtime","substitutions","triton","utils"] fail-fast: false steps: - name: Checkout Git Repository @@ -59,7 +60,7 @@ jobs: export FF_BUILD_UNIT_TESTS=ON fi ../config/config.linux - make -j $n_build_cores op-attrs + make -j $n_build_cores ${{ matrix.library }} From b2e121d4d1000cfa9159c367f4f673a1f392024d Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Mon, 21 Aug 2023 22:58:58 -0600 Subject: [PATCH 07/14] Use cuda 12 --- .github/workflows/per-lib-check.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 9020d6732d..8cedb8c8e0 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -13,7 +13,7 @@ jobs: shell: bash -l {0} # required to use an activated conda environment strategy: matrix: - gpu_backend: ["cuda", "hip_rocm"] + gpu_backend: ["cuda"] library: ["compiler","ffi","kernels","op-attrs","pcg","runtime","substitutions","triton","utils"] fail-fast: false steps: @@ -29,7 +29,7 @@ jobs: uses: Jimver/cuda-toolkit@v0.2.8 id: cuda-toolkit with: - cuda: "11.1.1" + cuda: "12.0.0" # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement use-github-cache: "false" From a6c0d8d692ec292496fb0c0d8340171a04670ef1 Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Tue, 22 Aug 2023 09:31:33 -0600 Subject: [PATCH 08/14] update cuda toolkit version --- .github/workflows/per-lib-check.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 8cedb8c8e0..b1c072193a 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -26,10 +26,10 @@ jobs: run: .github/workflows/helpers/free_space_on_runner.sh - name: Install CUDA - uses: Jimver/cuda-toolkit@v0.2.8 + uses: Jimver/cuda-toolkit@v0.2.11 id: cuda-toolkit with: - cuda: "12.0.0" + cuda: "12.1.0" # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement use-github-cache: "false" From 29cd459835f281382874e97dd5201491389e3d07 Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Tue, 22 Aug 2023 09:39:10 -0600 Subject: [PATCH 09/14] Fix job names --- .github/workflows/per-lib-check.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index b1c072193a..846d658ccf 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -6,14 +6,14 @@ concurrency: jobs: cmake-build: - name: Build FlexFlow with CMake + name: FlexFlow Library build with CMake runs-on: ubuntu-20.04 defaults: run: shell: bash -l {0} # required to use an activated conda environment strategy: matrix: - gpu_backend: ["cuda"] + gpu_backend: ["cuda","hip_rocm"] library: ["compiler","ffi","kernels","op-attrs","pcg","runtime","substitutions","triton","utils"] fail-fast: false steps: @@ -43,7 +43,7 @@ jobs: environment-file: packaging/conda/environment.yml auto-activate-base: false - - name: Build op-attrs libs + - name: Build lib ${{ matrix.library }} run: | export CUDNN_DIR=/usr/local/cuda export CUDA_DIR=/usr/local/cuda From 8dfe79d08c086dc86c322e7f8cee9cc262023fb8 Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Tue, 22 Aug 2023 09:46:34 -0600 Subject: [PATCH 10/14] Shorten job name so that variant is readable --- .github/workflows/per-lib-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 846d658ccf..1d8d2f847c 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -6,7 +6,7 @@ concurrency: jobs: cmake-build: - name: FlexFlow Library build with CMake + name: Library CMake Build runs-on: ubuntu-20.04 defaults: run: From 373a74e439888b09979a00bd3ac3ccb6208ceea4 Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Tue, 22 Aug 2023 10:47:53 -0600 Subject: [PATCH 11/14] Comment out unit, and example building --- .github/workflows/per-lib-check.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 1d8d2f847c..431905206e 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -55,10 +55,10 @@ jobs: if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi mkdir build cd build - if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - export FF_BUILD_ALL_EXAMPLES=ON - export FF_BUILD_UNIT_TESTS=ON - fi + #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then + # export FF_BUILD_ALL_EXAMPLES=ON + # export FF_BUILD_UNIT_TESTS=ON + #fi ../config/config.linux make -j $n_build_cores ${{ matrix.library }} From 91005ae220f030774be8cf52ddafbf1752e4061b Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Tue, 22 Aug 2023 11:37:40 -0600 Subject: [PATCH 12/14] Remove libraries with no build target --- .github/workflows/per-lib-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 431905206e..82afe1e124 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: gpu_backend: ["cuda","hip_rocm"] - library: ["compiler","ffi","kernels","op-attrs","pcg","runtime","substitutions","triton","utils"] + library: ["compiler","kernels","op-attrs","pcg","substitutions","utils"] fail-fast: false steps: - name: Checkout Git Repository From f368dc5ec6c95a64ae75dd4a308fbcc41b9b350b Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Tue, 22 Aug 2023 15:08:14 -0600 Subject: [PATCH 13/14] Re-enable build targets runtime and ffi --- lib/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index f5d4c788af..f7c166f0dd 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -1,8 +1,8 @@ add_subdirectory(pcg) add_subdirectory(compiler) -# add_subdirectory(runtime) +add_subdirectory(runtime) add_subdirectory(op-attrs) add_subdirectory(kernels) add_subdirectory(utils) -# add_subdirectory(ffi) +add_subdirectory(ffi) add_subdirectory(substitutions) From 662ad87a234dfb1c0f75a514b963c87e70bd4181 Mon Sep 17 00:00:00 2001 From: Andres Quan Date: Tue, 22 Aug 2023 15:10:03 -0600 Subject: [PATCH 14/14] Re-enable checks of runtime and ffi --- .github/workflows/per-lib-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index 82afe1e124..28c083189c 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: gpu_backend: ["cuda","hip_rocm"] - library: ["compiler","kernels","op-attrs","pcg","substitutions","utils"] + library: ["runtime", "ffi","compiler","kernels","op-attrs","pcg","substitutions","utils"] fail-fast: false steps: - name: Checkout Git Repository