Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
73 commits
Select commit Hold shift + click to select a range
2e98307
add unit tests for machine mapping and dp algorithm
wmdi Jul 19, 2023
8304b3d
add unit test for unity algorithm
wmdi Jul 19, 2023
8131070
Merge remote-tracking branch 'lockshaw/repo-refactor' into repo-refactor
wmdi Jul 21, 2023
dd33af6
fix compile errors from filter and support_interator_tag
wmdi Jul 20, 2023
766eafa
minor fixes for compiler
wmdi Jul 25, 2023
bf609d8
Merge remote-tracking branch 'lockshaw/repo-refactor' into repo-refactor
wmdi Jul 25, 2023
aa915a4
Merge branch 'repo-refactor' into repo-refactor
lockshaw Jul 26, 2023
908a66c
Merge branch 'repo-refactor' of github.com:wmdi/FlexFlow into repo-re…
wmdi Jul 26, 2023
57994af
clean up generator codes and minor fix
wmdi Aug 2, 2023
2be3f16
format
wmdi Aug 2, 2023
4c7e56e
format
wmdi Aug 2, 2023
edb1c58
serial parallel composition
wmdi Aug 9, 2023
70e2b49
remove commited out codes
wmdi Aug 10, 2023
6283528
view MultiDiGraph as labelled
wmdi Aug 11, 2023
a240092
make machine mapping immutable
wmdi Aug 11, 2023
a8988d9
minor fix & format
wmdi Aug 11, 2023
e6bc14a
move general codes into proper places
wmdi Aug 14, 2023
04a9525
format
wmdi Aug 14, 2023
8bad012
Merge remote-tracking branch 'upstream/repo-refactor' into repo-refactor
wmdi Aug 16, 2023
60b6f59
minor fix & format
wmdi Aug 17, 2023
8fd7ef0
minor fix
wmdi Aug 18, 2023
60e3945
update substitutions to align with latest changes
wmdi Aug 19, 2023
6bb76df
format
wmdi Aug 19, 2023
4d5d8de
draft substitutions
wmdi Aug 21, 2023
fc807b4
format
wmdi Aug 21, 2023
55e8de3
further draft substitution
wmdi Aug 21, 2023
4f0e4d2
format
wmdi Aug 21, 2023
550eb3c
Merge remote-tracking branch 'upstream/repo-refactor' into substitutions
wmdi Aug 22, 2023
cc5837b
minor fix
wmdi Aug 23, 2023
d1aa92f
refactor the pattern graph to be OutputLabelledOpenMultiDiGraph
wmdi Aug 25, 2023
a5e111e
format
wmdi Aug 25, 2023
2fb2c7d
minor fix
wmdi Aug 28, 2023
a1bffc5
updates
wmdi Aug 28, 2023
ee9f7ca
readme for substitutions
wmdi Aug 29, 2023
08dd3fe
format
wmdi Aug 30, 2023
82e2c2c
check substitution validity
wmdi Aug 30, 2023
ae97d59
initialize tests for substitutions
wmdi Aug 31, 2023
31d2ca0
Add partial required fix
lockshaw Sep 1, 2023
7ffd6bf
Merge remote-tracking branch 'origin/repo-refactor' into req-fix
lockshaw Sep 1, 2023
e0559cb
Merge remote-tracking branch 'upstream/repo-refactor' into substitutions
wmdi Sep 1, 2023
c2513c6
fix
wmdi Sep 1, 2023
c2b6b04
format
wmdi Sep 1, 2023
aad550b
Cleanup req and add significant printing/fmt support
lockshaw Sep 2, 2023
cb121a9
Clean up implementation and prepare for code review
lockshaw Sep 4, 2023
e23f187
Fix build
lockshaw Sep 4, 2023
21b8549
remove output tensor computation
wmdi Sep 4, 2023
0c0cb78
Begin implementing new utils library structure
lockshaw Sep 8, 2023
e3b633f
implement get_operator_attrs
wmdi Sep 8, 2023
3041dc7
get parallel operator attributes & minor fix
wmdi Sep 8, 2023
ea1e8e5
format
wmdi Sep 8, 2023
82215b2
More utils reorganization
lockshaw Sep 9, 2023
4d129ab
Fix test cases
lockshaw Sep 11, 2023
bd2d16e
Add template visitable struct
lockshaw Sep 12, 2023
b9cb70f
more testing
lockshaw Sep 13, 2023
1508050
match open graphs
wmdi Sep 13, 2023
ef93d29
Add multiparam template test macro
lockshaw Sep 14, 2023
58a7c62
Merge remote-tracking branch 'upstream/repo-refactor' into substitutions
wmdi Sep 15, 2023
85469cd
More restructuring of utils focusing on getting initial draft of all …
lockshaw Sep 16, 2023
3dcf25c
Refactor find-missing into a generic library
lockshaw Sep 17, 2023
ddb99d1
Add linter framework
lockshaw Sep 17, 2023
3119592
Restructure tools directory, add issue triage script
lockshaw Sep 17, 2023
322d945
minor fix
wmdi Sep 17, 2023
6d4fa2d
Make issue triage an actual python module in tools
lockshaw Sep 19, 2023
19372cd
Move everything into tooling lib
lockshaw Sep 21, 2023
c5356da
Merge remote-tracking branch 'upstream/repo-refactor' into substitutions
wmdi Sep 22, 2023
2d5e877
Merge remote-tracking branch 'upstream/repo-refactor' into substitutions
wmdi Sep 25, 2023
13eb692
Merge remote-tracking branch 'lockshaw/req-fix' into substitutions
wmdi Sep 28, 2023
e2e38a2
initialize graph fix
wmdi Sep 29, 2023
1016578
initialize labelled graph fix
wmdi Sep 29, 2023
3f14183
continue refactoring graph
wmdi Oct 1, 2023
f3c33e5
finalize interface design
wmdi Oct 3, 2023
8f48d5e
finish rewriting all the graphs
wmdi Oct 3, 2023
c2f97ae
refactor get_subgraph for open graphs
wmdi Oct 3, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# custom tooling
/.tools/
/.state/

# ctest-created files
Testing/

# clang compilation db
compile_commands.json

# Compiled files
/.tools/
/python/flexflow_python
/python/flexflow/core/legion_cffi.py
python/flexflow/core/flexflow_cffi_header.py
Expand Down
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,9 @@
[submodule "deps/any"]
path = deps/any
url = https://github.com/thelink2012/any.git
[submodule "deps/nameof"]
path = deps/nameof
url = git@github.com:Neargye/nameof.git
[submodule "deps/boost_preprocessor"]
path = deps/boost_preprocessor
url = https://github.com/boostorg/preprocessor.git
77 changes: 2 additions & 75 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,84 +3,11 @@ project(FlexFlow)

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_LIST_DIR}/cmake)

set(FF_MAX_DIM "5" CACHE STRING "Maximum tensor order")
set(FF_MAX_OPNAME "128" CACHE STRING "Maximum op name length")
set(FF_MAX_NUM_OUTPUTS "256" CACHE STRING "Maximum number of outputs (per operator)")
set(FF_MAX_NUM_INPUTS "256" CACHE STRING "Maximum number of inputs (per operator)")
set(FF_MAX_NUM_WEIGHTS "64" CACHE STRING "Maximum number of weights (per operator)")
set(FF_MAX_NUM_FUSED_OPERATORS "64" CACHE STRING "Maximum number of fused tensors")
set(FF_MAX_NUM_FUSED_TENSORS "64" CACHE STRING "Maximum number of input and output tensors per fused op")
set(FF_MAX_NUM_WORKERS "1024" CACHE STRING "Maximum number of GPUs")
set(FF_MAX_NUM_TASK_REGIONS "20" CACHE STRING
"Maximum number of regions that can be passed to a task through the TaskSpec interface")
set(FF_MAX_NUM_TASK_ARGUMENTS "5" CACHE STRING
"Maximum number of arguments that can be declared in a TaskSignature")
option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF)
option(FF_USE_PREBUILT_NCCL "Enable use of NCCL pre-compiled library, if available" ON)
option(FF_USE_PREBUILT_LEGION "Enable use of Legion pre-compiled library, if available" ON)
option(FF_USE_ALL_PREBUILT_LIBRARIES "Enable use of all pre-compiled libraries, if available" OFF)
option(FF_USE_PYTHON "Enable Python" ON)
option(FF_BUILD_FROM_PYPI "Build from pypi" OFF)

set(FF_GASNET_CONDUITS aries udp mpi ibv ucx)
set(FF_GASNET_CONDUIT "mpi" CACHE STRING "Select GASNet conduit ${FF_GASNET_CONDUITS}")
set_property(CACHE FF_GASNET_CONDUIT PROPERTY STRINGS ${FF_GASNET_CONDUITS})
set(FF_LEGION_NETWORKS "" CACHE STRING "Network backend(s) to use")

set(FF_GPU_BACKENDS cuda hip_cuda hip_rocm intel)
set(FF_GPU_BACKEND "cuda" CACHE STRING "Select GPU Backend ${FF_GPU_BACKENDS}")
set_property(CACHE FF_GPU_BACKEND PROPERTY STRINGS ${FF_GPU_BACKENDS})

option(FF_USE_EXTERNAL_LEGION "Use pre-installed Legion" OFF)
option(FF_BUILD_RESNET "build resnet example" OFF)
option(FF_BUILD_RESNEXT "build resnext example" OFF)
option(FF_BUILD_ALEXNET "build alexnet example" OFF)
option(FF_BUILD_DLRM "build DLRM example" OFF)
option(FF_BUILD_XDL "build XDL example" OFF)
option(FF_BUILD_INCEPTION "build inception example" OFF)
option(FF_BUILD_CANDLE_UNO "build candle uno example" OFF)
option(FF_BUILD_TRANSFORMER "build transformer example" OFF)
option(FF_BUILD_MOE "build mixture of experts example" OFF)
option(FF_BUILD_MLP_UNIFY "build mlp unify example" OFF)
option(FF_BUILD_SPLIT_TEST "build split test example" OFF)
option(FF_BUILD_SPLIT_TEST_2 "build split test 2 example" OFF)
option(FF_BUILD_ALL_EXAMPLES "build all examples. Overrides others" OFF)
option(FF_BUILD_UNIT_TESTS "build non-operator unit tests" OFF)
option(FF_BUILD_SUBSTITUTION_TOOL "build substitution conversion tool" OFF)
option(FF_BUILD_VISUALIZATION_TOOL "build substitution visualization tool" OFF)
option(FF_BUILD_ARG_PARSER "build command line argument parser" OFF)

set(FF_CUDA_ARCH "autodetect" CACHE STRING "Target CUDA Arch")
if (FF_CUDA_ARCH STREQUAL "")
message(FATAL_ERROR "FF_CUDA_ARCH cannot be an empty string. Set it to `autodetect`, `all`, or pass one or multiple valid CUDA archs.")
endif()

if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
set(LIBEXT ".so")
endif()

include(cuda)
include(cudnn)
include(nccl)
# set_property(CACHE FF_GPU_BACKEND PROPERTY STRINGS ${FF_GPU_BACKENDS})

include(json)
include(optional)
include(expected)
include(spdlog)
include(variant)
include(doctest)
include(visit_struct)
include(CTest)
include(fmt)
include(legion)
include(rapidcheck)
include(invoke)
include(any)
#include(gtest)
#include(fmt)

include(flexflow-utils)
include(utils)
include(deps)

# TODO @lockshaw remove me
# https://discourse.nixos.org/t/get-clangd-to-find-standard-headers-in-nix-shell/11268/6
Expand Down
16 changes: 0 additions & 16 deletions cmake/any.cmake

This file was deleted.

14 changes: 14 additions & 0 deletions cmake/deps.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_LIST_DIR}/deps)

include(boost_preprocessor)
include(cuda)
include(cudnn)
include(doctest)
include(fmt)
include(json)
include(legion)
include(nameof)
include(nccl)
include(rapidcheck)
include(spdlog)
include(visit_struct)
1 change: 1 addition & 0 deletions cmake/deps/boost_preprocessor.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/boost_preprocessor)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions cmake/deps/nameof.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/nameof)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 0 additions & 4 deletions cmake/expected.cmake

This file was deleted.

5 changes: 0 additions & 5 deletions cmake/invoke.cmake

This file was deleted.

4 changes: 0 additions & 4 deletions cmake/optional.cmake

This file was deleted.

62 changes: 6 additions & 56 deletions cmake/utils.cmake
Original file line number Diff line number Diff line change
@@ -1,56 +1,6 @@
set(known_gpu_archs "")
function(remove_duplicate_args __string)
if(${__string})
set(__list ${${__string}})
separate_arguments(__list)
list(REMOVE_DUPLICATES __list)
foreach(__e ${__list})
set(__str "${__str} ${__e}")
endforeach()
set(${__string} ${__str} PARENT_SCOPE)
endif()
endfunction()
function(detect_installed_gpus out_variable)
if(NOT CUDA_gpu_detect_output)
set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
file(WRITE ${__cufile} ""
"#include <cstdio>\n"
"int main()\n"
"{\n"
" int count = 0;\n"
" if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
" if (count == 0) return -1;\n"
" for (int device = 0; device < count; ++device)\n"
" {\n"
" cudaDeviceProp prop;\n"
" if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
" std::printf(\"%d.%d \", prop.major, prop.minor);\n"
" }\n"
" return 0;\n"
"}\n")
execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${__cufile}"
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(__nvcc_res EQUAL 0)
message(STATUS "No result from nvcc so building for 2.0")
string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_gpus tool" FORCE)
endif()
endif()
if(NOT CUDA_gpu_detect_output)
message(STATUS "Automatic GPU detection failed, Architecture is not set: ${known_gpu_archs}.")
set(${out_variable} ${known_gpu_archs} PARENT_SCOPE)
else()
remove_duplicate_args(CUDA_gpu_detect_output)
#Strip leading and trailing whitespaces
string(STRIP "${CUDA_gpu_detect_output}" CUDA_gpu_detect_output)
#Replace spaces in between with commas so you go from "5.2 6.1" to "5.2,6.1"
string(REGEX REPLACE " " "," CUDA_gpu_detect_output "${CUDA_gpu_detect_output}")
# message(${CUDA_gpu_detect_output})
string(REPLACE "." "" CUDA_gpu_detect_output "${CUDA_gpu_detect_output}")
# message(${CUDA_gpu_detect_output})
set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
# message(STATUS "Automatic GPU ARCH detection: ${CUDA_gpu_detect_output}")
endif()
endfunction()
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_LIST_DIR}/utils)

include(flexflow-utils)
include(aliasing)
include(build-options)
include(libext)
File renamed without changes.
51 changes: 51 additions & 0 deletions cmake/utils/build-options.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
set(FF_MAX_DIM "5" CACHE STRING "Maximum tensor order")
set(FF_MAX_OPNAME "128" CACHE STRING "Maximum op name length")
set(FF_MAX_NUM_OUTPUTS "256" CACHE STRING "Maximum number of outputs (per operator)")
set(FF_MAX_NUM_INPUTS "256" CACHE STRING "Maximum number of inputs (per operator)")
set(FF_MAX_NUM_WEIGHTS "64" CACHE STRING "Maximum number of weights (per operator)")
set(FF_MAX_NUM_FUSED_OPERATORS "64" CACHE STRING "Maximum number of fused tensors")
set(FF_MAX_NUM_FUSED_TENSORS "64" CACHE STRING "Maximum number of input and output tensors per fused op")
set(FF_MAX_NUM_WORKERS "1024" CACHE STRING "Maximum number of GPUs")
set(FF_MAX_NUM_TASK_REGIONS "20" CACHE STRING
"Maximum number of regions that can be passed to a task through the TaskSpec interface")
set(FF_MAX_NUM_TASK_ARGUMENTS "5" CACHE STRING
"Maximum number of arguments that can be declared in a TaskSignature")
option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF)
option(FF_USE_PREBUILT_NCCL "Enable use of NCCL pre-compiled library, if available" ON)
option(FF_USE_PREBUILT_LEGION "Enable use of Legion pre-compiled library, if available" ON)
option(FF_USE_ALL_PREBUILT_LIBRARIES "Enable use of all pre-compiled libraries, if available" OFF)
option(FF_USE_PYTHON "Enable Python" ON)
option(FF_BUILD_FROM_PYPI "Build from pypi" OFF)

set(FF_GASNET_CONDUITS aries udp mpi ibv ucx)
set(FF_GASNET_CONDUIT "mpi" CACHE STRING "Select GASNet conduit ${FF_GASNET_CONDUITS}")
set_property(CACHE FF_GASNET_CONDUIT PROPERTY STRINGS ${FF_GASNET_CONDUITS})
set(FF_LEGION_NETWORKS "" CACHE STRING "Network backend(s) to use")

set(FF_GPU_BACKENDS cuda hip_cuda hip_rocm intel)
set(FF_GPU_BACKEND "cuda" CACHE STRING "Select GPU Backend ${FF_GPU_BACKENDS}")
set_property(CACHE FF_GPU_BACKEND PROPERTY STRINGS ${FF_GPU_BACKENDS})

option(FF_USE_EXTERNAL_LEGION "Use pre-installed Legion" OFF)
option(FF_BUILD_RESNET "build resnet example" OFF)
option(FF_BUILD_RESNEXT "build resnext example" OFF)
option(FF_BUILD_ALEXNET "build alexnet example" OFF)
option(FF_BUILD_DLRM "build DLRM example" OFF)
option(FF_BUILD_XDL "build XDL example" OFF)
option(FF_BUILD_INCEPTION "build inception example" OFF)
option(FF_BUILD_CANDLE_UNO "build candle uno example" OFF)
option(FF_BUILD_TRANSFORMER "build transformer example" OFF)
option(FF_BUILD_MOE "build mixture of experts example" OFF)
option(FF_BUILD_MLP_UNIFY "build mlp unify example" OFF)
option(FF_BUILD_SPLIT_TEST "build split test example" OFF)
option(FF_BUILD_SPLIT_TEST_2 "build split test 2 example" OFF)
option(FF_BUILD_ALL_EXAMPLES "build all examples. Overrides others" OFF)
option(FF_BUILD_UNIT_TESTS "build non-operator unit tests" OFF)
option(FF_BUILD_SUBSTITUTION_TOOL "build substitution conversion tool" OFF)
option(FF_BUILD_VISUALIZATION_TOOL "build substitution visualization tool" OFF)
option(FF_BUILD_ARG_PARSER "build command line argument parser" OFF)

set(FF_CUDA_ARCH "autodetect" CACHE STRING "Target CUDA Arch")
if (FF_CUDA_ARCH STREQUAL "")
message(FATAL_ERROR "FF_CUDA_ARCH cannot be an empty string. Set it to `autodetect`, `all`, or pass one or multiple valid CUDA archs.")
endif()
Loading