flexflow · reyna-abhyankar · Mar 16, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/.flake/pkgs/legion.nix b/.flake/pkgs/legion.nix
@@ -0,0 +1,53 @@
+{ lib
+, stdenv
+, fetchFromGitLab
+, cmake
+, python3
+, cudaPackages ? { }
+, cudaCapabilities ? [ "60" "70" "80" "86" ]
+, maxDim ? 5
+}:
+
+# from https://codeberg.org/Uli/nix-things/src/commit/776519e382c81b136c1d0b10d8c7b52b4acb9192/overlays/cq/python/libclang-python.nix
+
+let 
+  cmakeFlag = x: if x then "1" else "0";
+
+  inherit (cudaPackages) cudatoolkit;
+in
+
+stdenv.mkDerivation rec {
+  pname = "legion_flexflow";
+  version = "2024-03-13";
+
+  src = fetchFromGitLab {
+    owner = "StanfordLegion";
+    repo = "legion";
+    rev = "24e8c452341dea41427e0ce61e154d61715e6835";
+    sha256 = "sha256-NjCSjphOIew/V24i74I6DModSGcWKLeiSIjts3cFtx4=";
+    fetchSubmodules = true;
+  };
+
+  nativeBuildInputs = [
+    cmake
+  ];
+
+  cmakeFlags = [
+    "-DLegion_USE_Python=1"
+    "-DLegion_BUILD_BINDINGS=1"
+    "-DLegion_USE_CUDA=1"
+    "-DLegion_CUDA_ARCH=${lib.concatStringsSep "," cudaCapabilities}"
+    "-DLegion_MAX_DIM=${toString maxDim}"
+  ];
+
+  buildInputs = [ 
+    python3
+    cudatoolkit
+  ];
+
+  meta = with lib; {
+    description = "Legion is a parallel programming model for distributed, heterogeneous machines";
+    homepage = "https://github.com/StanfordLegion/legion";
+    license = licenses.asl20;
+  };
+}
diff --git a/.flake/pkgs/tokenizers-cpp.nix b/.flake/pkgs/tokenizers-cpp.nix
@@ -0,0 +1,43 @@
+{ lib
+, stdenv
+, fetchFromGitHub
+, cmake
+, rustc
+, cargo
+}:
+
+stdenv.mkDerivation rec {
+  pname = "tokenizers-cpp";
+  version = "2024-03-13";
+
+  src = fetchFromGitHub {
+    owner = "mlc-ai";
+    repo = "tokenizers-cpp";
+    rev = "4f42c9fa74946d70af86671a3804b6f2433e5dac";
+    sha256 = "sha256-p7OYx9RVnKUAuMexy3WjW2zyfMJ/Q9ss4xFLsbQK7wA=";
+    fetchSubmodules = true;
+  };
+
+  nativeBuildInputs = [
+    cmake
+    rustc
+  ];
+
+  # cmakeFlags = [
+  #   "-DLegion_USE_Python=1"
+  #   "-DLegion_BUILD_BINDINGS=1"
+  #   "-DLegion_USE_CUDA=1"
+  #   "-DLegion_CUDA_ARCH=${lib.concatStringsSep "," cudaCapabilities}"
+  # ];
+
+  buildInputs = [ ];
+    # python3
+    # cudatoolkit
+  # ];
+
+  meta = with lib; {
+    description = "Universal cross-platform tokenizers binding to HF and sentencepiece";
+    homepage = "https://github.com/mlc-ai/tokenizers-cpp";
+    license = licenses.asl20;
+  };
+}
diff --git a/.github/workflows/helpers/build_cuda.sh b/.github/workflows/helpers/build_cuda.sh
@@ -0,0 +1,29 @@
+#! /usr/bin/env bash
+
+set -euo pipefail
+set -x
+
+DIR="$(realpath -- "$(dirname "${BASH_SOURCE[0]}")")"
+REPO="$(realpath -- "$DIR/../../../")"
+
+export FF_GPU_BACKEND="cuda"
+export FF_CUDA_ARCH=70
+cd "$REPO"
+mkdir build
+cd build
+#if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
+#  export FF_BUILD_ALL_EXAMPLES=ON
+#  export FF_BUILD_UNIT_TESTS=ON
+#fi
+../config/config.linux \
+        -DCMAKE_CXX_COMPILER="clang++" \
+        -DCMAKE_C_COMPILER="clang" \
+        -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+        -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+        -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
+        -DFF_USE_EXTERNAL_LEGION=ON \
+        -DFF_USE_EXTERNAL_JSON=ON \
+        -DFF_USE_EXTERNAL_FMT=ON \
+        -DFF_USE_EXTERNAL_SPDLOG=ON
+
+# vim: set tabstop=2 shiftwidth=2 expandtab:
diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml
@@ -8,9 +8,7 @@ jobs:
   cmake-build:
     name: Library CMake Build
     runs-on: ubuntu-20.04
-    defaults:
-      run:
-        shell: bash -l {0} # required to use an activated conda environment
+
     strategy:
       max-parallel: 1
       matrix:
@@ -22,23 +20,27 @@ jobs:
         with:
           submodules: recursive
 
-      - name: Free additional space on runner
-        run: .github/workflows/helpers/free_space_on_runner.sh
+      - name: Install nix
+        uses: cachix/install-nix-action@v25
+        with:
+          github_access_token: '${{ secrets.GITHUB_TOKEN }}'
 
-      - name: Install CUDA
-        uses: Jimver/cuda-toolkit@v0.2.11
-        id: cuda-toolkit
+      - uses: cachix/cachix-action@v14
         with:
-          cuda: "12.1.0"
-          # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement
-          use-github-cache: "false"
-          linux-local-args: '["--toolkit"]'
+          name: ff
+          skipPush: true
+          # authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
+
+      - name: setup nix develop shell
+        uses: nicknovitski/nix-develop@v1.1.0
+        with:
+          arguments: "--accept-flake-config .#ci"
 
       - name: ccache
         uses: hendrikmuhs/ccache-action@v1.2
 
-      - name: Install system dependencies
-        run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh
+      # - name: Install system dependencies
+      #   run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh
 
       # - name: Install conda and FlexFlow dependencies
       #   uses: conda-incubator/setup-miniconda@v2
@@ -49,20 +51,7 @@ jobs:
 
       - name: Run cmake
         run: |
-          export CUDNN_DIR=/usr/local/cuda
-          export CUDA_DIR=/usr/local/cuda
-          export FF_HOME=$(pwd)
-          export FF_GPU_BACKEND=${{ matrix.gpu_backend }}
-          export FF_CUDA_ARCH=70
-          n_build_cores=$(( $(nproc) cores_available -1 ))
-          if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi
-          mkdir build
-          cd build
-          #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
-          #  export FF_BUILD_ALL_EXAMPLES=ON 
-          #  export FF_BUILD_UNIT_TESTS=ON
-          #fi
-          ../config/config.linux -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache
+          .github/workflows/helpers/build_${{ matrix.gpu_backend }}.sh
 
       - name: Build utils
         run: |

diff --git a/.gitmodules b/.gitmodules
@@ -4,18 +4,6 @@
 [submodule "deps/nccl"]
 	path = deps/nccl
 	url = https://github.com/NVIDIA/nccl.git
-[submodule "deps/pybind11"]
-	path = deps/pybind11
-	url = https://github.com/pybind/pybind11.git
-[submodule "deps/googletest"]
-	path = deps/googletest
-	url = https://github.com/google/googletest.git
-[submodule "deps/variant"]
-	path = deps/variant
-	url = https://github.com/mpark/variant
-[submodule "deps/optional"]
-	path = deps/optional
-	url = https://github.com/TartanLlama/optional.git
 [submodule "deps/json"]
 	path = deps/json
 	url = https://github.com/nlohmann/json.git
@@ -37,9 +25,3 @@
 [submodule "deps/fmt"]
 	path = deps/fmt
 	url = https://github.com/fmtlib/fmt.git
-[submodule "deps/invoke"]
-	path = deps/invoke
-	url = https://github.com/BlackMATov/invoke.hpp.git
-[submodule "deps/any"]
-	path = deps/any
-	url = https://github.com/thelink2012/any.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,7 +1,12 @@
 cmake_minimum_required(VERSION 3.10)
 project(FlexFlow)
 
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_LIST_DIR}/cmake)
+set(
+  CMAKE_MODULE_PATH 
+  ${CMAKE_MODULE_PATH} 
+  ${CMAKE_CURRENT_LIST_DIR}/cmake 
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/Modules
+)
 
 # Detect OS type and Linux version (if it applies)
 set(LINUX_VERSION "")
@@ -28,7 +33,7 @@ set(FF_MAX_NUM_TASK_REGIONS "20" CACHE STRING
 set(FF_MAX_NUM_TASK_ARGUMENTS "5" CACHE STRING
   "Maximum number of arguments that can be declared in a TaskSignature")
 option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF)
-option(FF_USE_PREBUILT_NCCL "Enable use of NCCL pre-compiled library, if available" ON)
+option(FF_USE_EXTERNAL_NCCL "Enable use of NCCL pre-compiled library, if available" ON)
 option(FF_USE_PREBUILT_LEGION "Enable use of Legion pre-compiled library, if available" ON)
 option(FF_USE_ALL_PREBUILT_LIBRARIES "Enable use of all pre-compiled libraries, if available" OFF)
 option(FF_USE_PYTHON "Enable Python" ON)
@@ -77,20 +82,15 @@ include(nccl)
 # set_property(CACHE FF_GPU_BACKEND PROPERTY STRINGS ${FF_GPU_BACKENDS})
 
 include(json)
-include(optional)
 include(expected)
 include(spdlog)
-include(variant)
 include(doctest)
 include(visit_struct)
 include(CTest)
 include(fmt)
 include(legion)
 include(rapidcheck)
-include(invoke)
-include(any)
 #include(gtest)
-#include(fmt)
 
 include(flexflow-utils)