diff --git a/.clang-format-for-format-sh b/.clang-format-for-format-sh new file mode 100644 index 0000000000..17e9f8935d --- /dev/null +++ b/.clang-format-for-format-sh @@ -0,0 +1,182 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignArrayOfStructures: None +AlignConsecutiveMacros: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignEscapedNewlines: Right +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes +AttributeMacros: + - __capability +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: true +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 0 + CaseSensitive: false + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 0 + CaseSensitive: false + - Regex: '.*' + Priority: 1 + SortPriority: 0 + CaseSensitive: false +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseLabels: true +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentRequires: false +IndentWidth: 2 +IndentWrappedFunctionNames: true +InsertBraces: true +InsertTrailingCommas: None +InsertNewlineAtEOF: true +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +LambdaBodyIndentation: Signature +LineEnding: LF +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PenaltyIndentedWhitespace: 0 +PointerAlignment: Right +PPIndentWidth: -1 +QualifierAlignment: Right +ReferenceAlignment: Pointer +ReflowComments: true +ShortNamespaceLines: 1 +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceAroundPointerQualifiers: Default +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both +Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME +... + diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml index 46c9bf3be2..fb93fd6b5b 100644 --- a/.github/workflows/clang-format-check.yml +++ b/.github/workflows/clang-format-check.yml @@ -5,21 +5,25 @@ jobs: name: Formatting Check runs-on: ubuntu-latest strategy: + fail-fast: false matrix: path: - - check: "src" - exclude: '\.proto$' - - check: "include" - - check: "nmt" - - check: "python" - - check: "scripts" + - check: "lib/compiler" + - check: "lib/ffi" + - check: "lib/kernels" + - check: "lib/op-attrs" + - check: "lib/pcg" + - check: "lib/runtime" + - check: "lib/substitutions" + - check: "lib/utils" - check: "tests" - check: "examples" + - check: "bindings" steps: - uses: actions/checkout@v2 - name: Run clang-format style check for C/C++/Protobuf programs. - uses: jidicula/clang-format-action@v4.8.0 + uses: lockshaw/clang-format-action@v4.11.0-flexflow-3 with: - clang-format-version: "15" + clang-format-version: "16" check-path: ${{ matrix.path['check'] }} exclude-regex: ${{ matrix.path['exclude'] }} diff --git a/examples/cpp/DLRM/strategies/dlrm_strategy.cc b/examples/cpp/DLRM/strategies/dlrm_strategy.cc index 2fcc4173c9..a7fab8c3a8 100644 --- a/examples/cpp/DLRM/strategies/dlrm_strategy.cc +++ b/examples/cpp/DLRM/strategies/dlrm_strategy.cc @@ -356,4 +356,4 @@ int main(int argc, char **argv) { std::to_string(gpu) + "gpus.pb"; std::fstream outputFile(output.c_str(), std::ios::out | std::ios::trunc); strategy.SerializeToOstream(&outputFile); */ -} \ No newline at end of file +} diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index f9ad92a147..ece639cb6a 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -1,8 +1,8 @@ #add_subdirectory(pcg) #add_subdirectory(compiler) -# add_subdirectory(runtime) -#add_subdirectory(op-attrs) -#add_subdirectory(kernels) +add_subdirectory(runtime) +add_subdirectory(op-attrs) +add_subdirectory(kernels) add_subdirectory(utils) # add_subdirectory(ffi) #add_subdirectory(substitutions) diff --git a/lib/compiler/include/compiler/machine_mapping.h b/lib/compiler/include/compiler/machine_mapping.h index c105221682..400e2770f8 100644 --- a/lib/compiler/include/compiler/machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping.h @@ -39,4 +39,4 @@ MachineMapping optimal_cost( MAKE_VISIT_HASHABLE(::FlexFlow::MachineMapping); -#endif \ No newline at end of file +#endif diff --git a/lib/compiler/test/test_dp.cc b/lib/compiler/test/test_dp.cc index 18b098a202..01e4189839 100644 --- a/lib/compiler/test/test_dp.cc +++ b/lib/compiler/test/test_dp.cc @@ -51,4 +51,4 @@ TEST_CASE("optimal_cost") { optimal_cost(pcg, allowed_machine_views, TestCostEstimator{}, resource); // TODO: check result -} \ No newline at end of file +} diff --git a/lib/compiler/test/test_open_graph.cc b/lib/compiler/test/test_open_graph.cc index 6288b481fe..66af736a50 100644 --- a/lib/compiler/test/test_open_graph.cc +++ b/lib/compiler/test/test_open_graph.cc @@ -102,4 +102,4 @@ TEST_CASE("get_cut") { GraphSplit gs1{{ns[0], ns[1], ns[2], ns[3]}, {ns[4]}}; CHECK(get_cut(g, gs1) == std::unordered_set{e3, e4}); -} \ No newline at end of file +} diff --git a/lib/kernels/include/kernels/reshape_kernels.h b/lib/kernels/include/kernels/reshape_kernels.h index fa752b6c2b..972f8ee9b2 100644 --- a/lib/kernels/include/kernels/reshape_kernels.h +++ b/lib/kernels/include/kernels/reshape_kernels.h @@ -30,4 +30,4 @@ void backward_kernel(ffStream_t stream, } // namespace Kernels } // namespace FlexFlow -#endif // _FLEXFLOW_OPS_KERNELS_RESHAPE_KERNELS_H \ No newline at end of file +#endif // _FLEXFLOW_OPS_KERNELS_RESHAPE_KERNELS_H diff --git a/lib/kernels/src/allocation.cc b/lib/kernels/src/allocation.cc new file mode 100644 index 0000000000..fa32a9a705 --- /dev/null +++ b/lib/kernels/src/allocation.cc @@ -0,0 +1,13 @@ +#include "kernels/allocation.h" + +namespace FlexFlow { + +void *Allocator::allocate(size_t size) { + return i_allocator->allocate(size); +} + +void Allocator::deallocate(void *ptr) { + i_allocator->deallocate(ptr); +} + +} // namespace FlexFlow diff --git a/lib/kernels/src/array_shape.cc b/lib/kernels/src/array_shape.cc index 44507c14c4..67c3de54dd 100644 --- a/lib/kernels/src/array_shape.cc +++ b/lib/kernels/src/array_shape.cc @@ -10,4 +10,6 @@ std::size_t ArrayShape::get_volume() const { return product(this->dims); } +ArrayShape::ArrayShape(std::vector const &_dims) : dims(_dims) {} + } // namespace FlexFlow diff --git a/lib/kernels/src/hip/aggregate_kernels.cpp b/lib/kernels/src/hip/aggregate_kernels.cpp index ff50a8c7ad..40faaaeeff 100644 --- a/lib/kernels/src/hip/aggregate_kernels.cpp +++ b/lib/kernels/src/hip/aggregate_kernels.cpp @@ -296,4 +296,4 @@ AggregatePerDeviceState::~AggregatePerDeviceState(void) { } // namespace Aggregate } // namespace Kernels -} // namespace FlexFlow \ No newline at end of file +} // namespace FlexFlow diff --git a/lib/kernels/src/hip/optimizer_kernel.cpp b/lib/kernels/src/hip/optimizer_kernel.cpp index 7f57d6a2fb..c22ecd7f5a 100644 --- a/lib/kernels/src/hip/optimizer_kernel.cpp +++ b/lib/kernels/src/hip/optimizer_kernel.cpp @@ -245,4 +245,4 @@ __host__ void AdamOptimizer::nccl_update_task_gpu(AdamOptimizer const *op, } #endif -}; // namespace FlexFlow \ No newline at end of file +}; // namespace FlexFlow diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h b/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h index d38ba75232..aeec0c6e01 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dims.h @@ -11,6 +11,7 @@ struct ParallelTensorDims : public use_visitable_cmp { size_t get_volume() const; size_t num_dims() const; + std::vector get_dims() const; using iterator = typename FFOrdered::iterator; using const_iterator = typename FFOrdered::const_iterator; diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h index fd560352bb..182b2169c3 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h @@ -26,7 +26,6 @@ struct ParallelTensorShape : public use_visitable_cmp { ParallelTensorShape(TensorShape const &); int num_dims() const; - ParallelDim const &at(ff_dim_t const &) const; ParallelDim &at(ff_dim_t const &); ParallelDim const &operator[](ff_dim_t const &) const; diff --git a/lib/op-attrs/include/op-attrs/tensor_shape.h b/lib/op-attrs/include/op-attrs/tensor_shape.h index fa34860817..ab0f958b42 100644 --- a/lib/op-attrs/include/op-attrs/tensor_shape.h +++ b/lib/op-attrs/include/op-attrs/tensor_shape.h @@ -16,7 +16,7 @@ struct TensorShape : public use_visitable_cmp { template TensorShape(Dims const &dims, DataType data_type) - : dims(dims), data_type(data_type) {} + : dims(this->dims), data_type(this->data_type) {} size_t at(ff_dim_t) const; size_t operator[](ff_dim_t) const; diff --git a/lib/op-attrs/src/datatype.cc b/lib/op-attrs/src/datatype.cc new file mode 100644 index 0000000000..6fb2d41051 --- /dev/null +++ b/lib/op-attrs/src/datatype.cc @@ -0,0 +1,24 @@ +#include "op-attrs/datatype.h" + +namespace FlexFlow { + +size_t size_of(DataType data_type) { + switch (data_type) { + case DataType::BOOL: + return sizeof(bool); + case DataType::INT32: + return sizeof(int32_t); + case DataType::INT64: + return sizeof(int64_t); + case DataType::HALF: + return sizeof(float) / 2; + case DataType::FLOAT: + return sizeof(float); + case DataType::DOUBLE: + return sizeof(double); + default: + throw mk_runtime_error("Unknown data type"); + } +} + +} // namespace FlexFlow diff --git a/lib/op-attrs/src/parallel_tensor_shape.cc b/lib/op-attrs/src/parallel_tensor_shape.cc index 9a36e7d11b..0f4121973f 100644 --- a/lib/op-attrs/src/parallel_tensor_shape.cc +++ b/lib/op-attrs/src/parallel_tensor_shape.cc @@ -16,6 +16,24 @@ static std::vector lift_dims(TensorDims const &dims) { ParallelTensorDims::ParallelTensorDims(TensorDims const &dims) : data(lift_dims(dims)) {} +std::vector ParallelTensorDims::get_dims() const { + std::vector dims; + for (ParallelDim const &d : this->data) { + dims.push_back(d.size); + } + return dims; +} + +size_t ParallelTensorDims::get_volume() const { + + // this function can use contains.h to optimize the code + size_t volume = 1; + for (ParallelDim const &d : this->data) { + volume *= d.size; + } + return volume; +} + ParallelTensorShape::ParallelTensorShape(TensorShape const &tensor_shape) : dims(tensor_shape.dims), data_type(tensor_shape.data_type) {} @@ -23,6 +41,10 @@ int get_num_replica_dims(ParallelTensorShape const &shape) { return count(shape.dims, is_replica_dim); } +TensorShape get_piece_shape(ParallelTensorShape const ¶ll_tensor_shape) { + return TensorShape(parall_tensor_shape.dims, parall_tensor_shape.data_type); +} + int get_num_replicas(ParallelTensorShape const &shape) { return product( transform(filter(as_vector(shape.dims), is_replica_dim), diff --git a/lib/runtime/src/cuda_allocator.cc b/lib/runtime/src/cuda_allocator.cc new file mode 100644 index 0000000000..f60bf513ee --- /dev/null +++ b/lib/runtime/src/cuda_allocator.cc @@ -0,0 +1,16 @@ +#include "cuda_allocator.h" +#include "kernels/device.h" + +namespace FlexFlow { + +void *CudaAllocator::allocate(size_t size) { + void *ptr; + checkCUDA(cudaMalloc(&ptr, size)); + return ptr; +} + +void CudaAllocator::deallocate(void *ptr) { + checkCUDA(cudaFree(ptr)); +} + +} // namespace FlexFlow diff --git a/lib/runtime/src/cuda_allocator.h b/lib/runtime/src/cuda_allocator.h new file mode 100644 index 0000000000..94f47ba035 --- /dev/null +++ b/lib/runtime/src/cuda_allocator.h @@ -0,0 +1,18 @@ +#ifndef _FLEXFLOW_RUNTIME_CUDA_ALLOCATOR_H +#define _FLEXFLOW_RUNTIME_CUDA_ALLOCATOR_H + +#include "kernels/allocation.h" +#include + +namespace FlexFlow { + +struct CudaAllocator : public IAllocator { + ~CudaAllocator() override; + + void *allocate(size_t) override; + void deallocate(void *) override; +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/runtime/src/ops/aggregate.cc b/lib/runtime/src/ops/aggregate.cc index e9874c877a..c1136f513e 100644 --- a/lib/runtime/src/ops/aggregate.cc +++ b/lib/runtime/src/ops/aggregate.cc @@ -467,6 +467,8 @@ CostMetrics fwd_binding.bind_arg(PROFILING, settings); + fwd_binding.bind_arg(ATTRS, attrs); + auto fwd_accessor = env.get_fwd_accessor(AGGREGATE_FWD_TASK_ID, fwd_binding); SimTaskBinding bwd_binding = infer_bwd_binding(fwd_binding); diff --git a/lib/runtime/src/realm_allocator.h b/lib/runtime/src/realm_allocator.h index 210fd8a050..95957b643d 100644 --- a/lib/runtime/src/realm_allocator.h +++ b/lib/runtime/src/realm_allocator.h @@ -15,10 +15,7 @@ struct RealmAllocator : public IAllocator { ~RealmAllocator() override; void *allocate(size_t) override; - void deallocate(void *) override; - -private: - Legion::Memory memory; + void deallocate(void *) override private : Legion::Memory memory; stack_vector instances; }; diff --git a/lib/runtime/src/sim_environment.cc b/lib/runtime/src/sim_environment.cc new file mode 100644 index 0000000000..8c599ca2e5 --- /dev/null +++ b/lib/runtime/src/sim_environment.cc @@ -0,0 +1,26 @@ +#include "sim_environment.h" + +namespaec FlexFlow { + + void SimTaskBinding::bind(slot_id id, ParallelTensorShape const &shape) { + tensor_shape_bindings.insert(id, shape); + } + void SimTaskBinding::bind(slot_id id, TensorShape const &shape) { + tensor_shape_bindings.insert(id, shape); + } + + void SimTaskBinding::bind(slot_id id, + InputVariadicParallelTensorDesc const &desc) { + this->tensor_shape_bindings.insert(id, desc); + } + + void SimTaskBinding::bind_arg(slot_id id, SimArg const &arg) { + arg_bindings.insert(id, arg); + } + + TaskArgumentAccessor SimEnvironment::get_fwd_accessor( + task_id_t tid, SimTaskBinding const &sim_task_binding) { + NOT_IMPLEMENTED(); // TODO + } + +} // namespace FlexFlow diff --git a/lib/runtime/src/sim_environment.h b/lib/runtime/src/sim_environment.h index d08bef653f..57b00eda34 100644 --- a/lib/runtime/src/sim_environment.h +++ b/lib/runtime/src/sim_environment.h @@ -4,9 +4,11 @@ #include "cost_metrics.h" #include "kernels/accessor.h" #include "kernels/allocation.h" +#include "kernels/profiling.h" #include "op-attrs/parallel_tensor_shape.h" #include "task_spec/op_task_invocation.h" #include "task_spec/task_argument_accessor.h" +#include #include namespace FlexFlow { @@ -23,6 +25,11 @@ struct InputVariadicParallelTensorDesc { IsTrainable trainable; }; +using SimArg = variant; +using SimTensorSpec = variant; + struct SimTaskBinding { void bind(slot_id, ParallelTensorShape const &); void bind_untrainable(slot_id, ParallelTensorShape const &); @@ -35,7 +42,10 @@ struct SimTaskBinding { void bind(slot_id, InputVariadicParallelTensorDesc const &); template - void bind_arg(slot_id, T const &); + void bind_arg(slot_id id, T const &name); + + std::unordered_map arg_bindings; + std::unordered_map tensor_shape_bindings; }; SimTaskBinding infer_bwd_binding(SimTaskBinding const &); diff --git a/lib/runtime/src/task_spec/task_argument_accessor.cc b/lib/runtime/src/task_spec/task_argument_accessor.cc new file mode 100644 index 0000000000..26ad9e3016 --- /dev/null +++ b/lib/runtime/src/task_spec/task_argument_accessor.cc @@ -0,0 +1,80 @@ +#include "task_argument_accessor.h" + +using namespace FlexFlow { + + template + T const &LocalTaskArgumentAccessor::get_argument(slot_id slot) const { + if (slot == PROFILING) { + return get(this->arg_bindings.at(slot)); + } + elif (slot == ATTRS) { + return get(this->arg_bindings.at(slot)); + } + else { + throw std::runtime_error( + "Unknown Slot ID in LocalTaskArgumentAccessor::get_argument"); + } + } + + void *LocalTaskArgumentAccessor::allocate(size_t size) { + void *ptr = + local_allocator.allocate(size); // Note: how(when) to free this memory? + void *cpu_ptr = malloc(size); + memory_usage += size; // update the usage of memory + memset(cpu_ptr, 0, size); + checkCUDA( + cudaMemcpy(ptr, cpu_ptr, size, cudaMemcpyHostToDevice)); // fill ptr + free(cpu_ptr); + return ptr; + } + + template + privilege_mode_to_accessor LocalTaskArgumentAccessor::get_tensor( + slot_id slot) const { + SimTensorSpec const &spec = this->tensor_shape_bindings.at(slot); + if (slot == GATE_PREDS || slot == GATE_ASSIGN) { + InputParallelTensorDesc gate_preds = get( + this->sim_task_binding->tensor_shape_bindings.at(slot)); + DataType data_type = gate_preds.shape.data_type; + ArrayShape array_shape = { + gate_preds.shape.dims.get_dims()}; // gate_preds.shape.dims.get_dims() + // return std::vector + size_t shape_size = gate_preds.shape.dims.get_volume() * size_of(shape); + void *ptr = allocate(shape_size); + return gate_preds_accessor{data_type, array_shape, ptr}; + } else if (slot == OUTPUT) { + ParallelTensorShape output_shape = get( + this->sim_task_binding->tensor_shape_bindings.at(slot)); + Datatype data_type = output_shape.data_type; + ArrayShape array_shape = { + output_shape.dims.get_dims()}; // output_shape.dims.get_dims() return + // std::vector + size_t shape_size = output_shape.dims.get_volume() * size_of(data_type); + void *ptr = allocate(shape_size); + return {data_type, array_shape, ptr}; + } else { + throw mk_runtime_error( + "Unknown Slot ID in LocalTaskArgumentAccessor::get_tensor"); + } + } + + template + std::vector> get_variadic_tensor( + slot_id slot) const override { + std::vector> result; + InputVariadicParallelTensorDesc const &spec = + get( + this->sim_task_binding->tensor_shape_bindings.at(slot)); + for (auto const &shape : spec.shapes) { + ArrayShape array_shape = { + shape.dims + .get_dims()}; // shape.dims.get_dims() return std::vector + size_t shape_size = shape.dims.get_volume() * size_of(shape.data_type); + void *ptr = allocate(shape_size); + DataType data_type = shape.data_type; + result.push_back({data_type, array_shape, ptr}); + } + return result; + } + +} // namespace FlexFlow diff --git a/lib/runtime/src/task_spec/task_argument_accessor.h b/lib/runtime/src/task_spec/task_argument_accessor.h index 4a4cf64512..f064aa4b9a 100644 --- a/lib/runtime/src/task_spec/task_argument_accessor.h +++ b/lib/runtime/src/task_spec/task_argument_accessor.h @@ -2,6 +2,7 @@ #define _FLEXFLOW_RUNTIME_SRC_TASK_ARGUMENT_ACCESSOR_H #include "accessor.h" +#include "kernels/allocation.h" #include "runtime/config.h" #include "task_invocation.h" #include "utils/exception.h" @@ -70,104 +71,154 @@ region_idx_t get_region_idx(TaskArgumentsFormat const &, parallel_tensor_guid_t const &); DataType get_datatype(TaskArgumentsFormat const &, region_idx_t const &); -struct TaskArgumentAccessor { - TaskArgumentAccessor(Legion::Task const *task, - std::vector const ®ions, - Legion::Context ctx, - Legion::Runtime *runtime); +struct ITaskArgumentAccessor { + virtual template + T const &get_argument(slot_id slot) const = 0; + + virtual template + privilege_mode_to_accessor get_tensor(slot_id slot) const = 0; + + virtual template + std::vector> + get_variadic_tensor(slot_id slot) const = 0; + + virtual template + optional get_optional_argument(slot_id) const = 0; + + virtual template + std::vector get_variadic_argument(slot_id) const = 0; + + virtual template + privilege_mode_to_accessor + get_generic_accessor(region_idx_t const &idx) const = 0; + + virtual template + privilege_mode_to_accessor get_tensor_grad(slot_id slot) const = 0; + virtual template + std::vector> + get_variadic_tensor_grad(slot_id slot) const = 0; + + virtual size_t get_device_idx() const = 0; +}; + +struct LegionTaskArgumentAccessor : public ITaskArgumentAccessor { +public: template - T const &get_argument(slot_id slot) const { - TaskArgumentFormat arg_fmt = this->args_fmt.args.at(slot); - std::type_index actual_type = arg_fmt.type; - std::type_index requested_type = {typeid(T)}; + T const &get_argument(slot_id slot) const override; - if (actual_type != requested_type) { - throw mk_runtime_error( - "Type mismatch in argument access (\"{}\" != \"{}\")", - actual_type.name(), - requested_type.name()); - } + template + privilege_mode_to_accessor get_tensor(slot_id slot) const override; - void *start_ptr = &((std::uint8_t *)this->task->args)[arg_fmt.start]; - Legion::Deserializer dez(start_ptr, arg_fmt.size()); + template + std::vector> + get_variadic_tensor(slot_id slot) const override; - return ff_task_deserialize(dez); - } + template + optional get_optional_argument(slot_id) const override; template - optional get_optional_argument(slot_id) const { - NOT_IMPLEMENTED(); - } + std::vector get_variadic_argument(slot_id) const override; + + template + privilege_mode_to_accessor get_tensor_grad(slot_id slot) const override; + + template + std::vector> + get_variadic_tensor_grad(slot_id slot) const override; + + size_t get_device_idx() const override; + LegionTaskArgumentAccessor(Legion::Task const *task, + std::vector const ®ions, + Legion::Context ctx, + Legion::Runtime *runtime) + : task(task), regions(regions), ctx(ctx), runtime(runtime) {} + +private: + Legion::Task const *task; + std::vector const ®ions; + Legion::Context ctx; + Legion::Runtime *runtime; + TaskArgumentsFormat const &args_fmt; +}; + +struct LocalTaskArgumentAccessor : public ITaskArgumentAccessor { +public: template - std::vector get_variadic_argument(slot_id) const { - NOT_IMPLEMENTED(); - } + T const &get_argument(slot_id slot) const override; template - privilege_mode_to_accessor - get_generic_accessor(region_idx_t const &idx) const { - auto tensor_privs = get_permissions(this->args_fmt, idx); - if (tensor_privs != PRIV) { - throw mk_runtime_error( - "Privilege mismatch while accessing tensor: {} != {}", - tensor_privs, - PRIV); - } - - return helperGetGenericTensorAccessor( - get_datatype(this->args_fmt, idx), - regions[idx.value()], - task->regions[idx.value()], - FID_DATA, - ctx, - runtime); - } + privilege_mode_to_accessor get_tensor(slot_id slot) const override; template - privilege_mode_to_accessor get_tensor(slot_id slot) const { - auto argument_format = - get(this->args_fmt.region_idxs.at(slot)); + std::vector> + get_variadic_tensor(slot_id slot) const override; - return this->get_generic_accessor(argument_format); - } + template + optional get_optional_argument(slot_id) const override; + + template + std::vector get_variadic_argument(slot_id) const override; template - privilege_mode_to_accessor get_tensor_grad(slot_id slot) const { - NOT_IMPLEMENTED(); - } + privilege_mode_to_accessor get_tensor_grad(slot_id slot) const override; template std::vector> - get_variadic_tensor(slot_id slot) const { - std::vector> result; + get_variadic_tensor_grad(slot_id slot) const override; + + size_t get_device_idx() const override; - auto argument_format = - get(this->args_fmt.region_idxs.at(slot)); - for (NonvariadicFormat const &argument : argument_format) { - result.push_back(this->get_generic_accessor(argument)); - } + LocalTaskArgumentAccessor( + std::shared_ptr &sim_task_binding) + : sim_task_binding(sim_task_binding), memory_usage(0) { + local_allocator = Allocator::create(); + } - return result; + size_t get_memory_usage() const { + return memory_usage; + } + + void *allocate(size_t size); + void deallocate(void *ptr); + +private: + std::shared_ptr sim_task_binding; + Allocator local_allocator; + size_t memory_usage; +}; + +struct TaskArgumentAccessor { + template + T const &get_argument(slot_id slot) const { + return this->ptr->get_argument(slot); + } + + template + privilege_mode_to_accessor get_tensor(slot_id slot) const { + return this->ptr->get_tensor(slot); } template std::vector> - get_variadic_tensor_grad(slot_id slot) const { - NOT_IMPLEMENTED(); + get_variadic_tensor(slot_id slot) const { + return this->ptr->get_variadic_tensor(slot); } - size_t get_device_idx() const { - NOT_IMPLEMENTED(); + template + static + typename std::enable_if::value, + TaskArgumentAccessor>::type + create(Args &&...args) { + return TaskArgumentAccessor( + std::make_shared(std::forward(args)...)); } private: - Legion::Task const *task; - std::vector const ®ions; - Legion::Context ctx; - Legion::Runtime *runtime; - TaskArgumentsFormat const &args_fmt; + TaskArgumentAccessor(std::shared_ptr &ptr) + : ptr(ptr) {} + std::shared_ptr ptr; }; } // namespace FlexFlow diff --git a/lib/runtime/test/src/test_op_task_spec.cc b/lib/runtime/test/src/test_op_task_spec.cc index 821ef0dba6..bb0bee567c 100644 --- a/lib/runtime/test/src/test_op_task_spec.cc +++ b/lib/runtime/test/src/test_op_task_spec.cc @@ -44,4 +44,4 @@ TEST_CASE("OpTaskBinding") { correct_bwd.bind_grad(2, input_tensor(2).grad()); CHECK(correct_bwd == bwd); -} \ No newline at end of file +} diff --git a/lib/runtime/test/src/test_serialization.cc b/lib/runtime/test/src/test_serialization.cc index caf5cd7c93..ef18764efb 100644 --- a/lib/runtime/test/src/test_serialization.cc +++ b/lib/runtime/test/src/test_serialization.cc @@ -44,4 +44,4 @@ TEST_CASE("Serialization") { RC_ASSERT(post_op == pre_op); })) } -} \ No newline at end of file +} diff --git a/lib/utils/include/utils/containers.h b/lib/utils/include/utils/containers.h index df156c9060..236e4b6eac 100644 --- a/lib/utils/include/utils/containers.h +++ b/lib/utils/include/utils/containers.h @@ -714,4 +714,4 @@ reversed_container_t reversed_container(C const &c) { } // namespace FlexFlow -#endif \ No newline at end of file +#endif diff --git a/lib/utils/include/utils/internal_only_tag.h b/lib/utils/include/utils/internal_only_tag.h index 649ce4cf12..1e5f8571d0 100644 --- a/lib/utils/include/utils/internal_only_tag.h +++ b/lib/utils/include/utils/internal_only_tag.h @@ -7,4 +7,4 @@ struct should_only_be_used_internally_tag_t { }; } // namespace FlexFlow -#endif \ No newline at end of file +#endif diff --git a/lib/utils/include/utils/visitable.h b/lib/utils/include/utils/visitable.h index 4f0dc50cbe..6a671400cd 100644 --- a/lib/utils/include/utils/visitable.h +++ b/lib/utils/include/utils/visitable.h @@ -457,4 +457,4 @@ struct Arbitrary< _GET_VISITABLE_CASE_FROM_NUM_ARGS(__VA_ARGS__), \ __VA_ARGS__) -#endif \ No newline at end of file +#endif diff --git a/lib/utils/test/src/test_algorithms.cc b/lib/utils/test/src/test_algorithms.cc index 35534f5b3a..7748bd3ff0 100644 --- a/lib/utils/test/src/test_algorithms.cc +++ b/lib/utils/test/src/test_algorithms.cc @@ -220,4 +220,4 @@ TEST_CASE("get_weakly_connected_components") { }; CHECK(get_weakly_connected_components(g) == expected_components); -} \ No newline at end of file +} diff --git a/python/flexflow_c.cc b/python/flexflow_c.cc index 4c16dd82e9..1d3d77afe5 100644 --- a/python/flexflow_c.cc +++ b/python/flexflow_c.cc @@ -33,7 +33,9 @@ class FFCObjectWrapper { t_.impl = const_cast(static_cast(t)); \ return t_; \ } \ - static T unwrap(T_ t_) { return static_cast(t_.impl); } \ + static T unwrap(T_ t_) { \ + return static_cast(t_.impl); \ + } \ static const T unwrap_const(const T_ t_) { \ return static_cast(t_.impl); \ } diff --git a/scripts/format.sh b/scripts/format.sh index 2ed97b8f0a..9610dc2d26 100755 --- a/scripts/format.sh +++ b/scripts/format.sh @@ -6,8 +6,8 @@ GIT_ROOT="$(git rev-parse --show-toplevel)" cd "$GIT_ROOT" TOOLS_PATH="$GIT_ROOT/.tools" -RELEASE="master-1d7ec53d" -CLANG_FORMAT_VERSION="15" +RELEASE="master-f4f85437" +CLANG_FORMAT_VERSION="16" CLANG_FORMAT_PATH="$TOOLS_PATH/clang-format-$CLANG_FORMAT_VERSION-$RELEASE" mkdir -p "$TOOLS_PATH" @@ -68,5 +68,10 @@ if [[ ! -e $CLANG_FORMAT_PATH ]]; then chmod u+x "$CLANG_FORMAT_PATH" fi +CLANG_FORMAT_CONFIG="$GIT_ROOT/.clang-format-for-format-sh" mapfile -t FILES < <(git ls-files ':!:triton/**' '*.h' '*.cc' '*.cpp' '*.cu' '*.c') -"$CLANG_FORMAT_PATH" -i "${FILES[@]}" +if [[ -f $CLANG_FORMAT_CONFIG ]]; then + "$CLANG_FORMAT_PATH" --style=file:"$CLANG_FORMAT_CONFIG" -i "${FILES[@]}" +else + echo "error" +fi