From 14a9ab6b87c8edde81e6d80b04dbed6e8597e5b2 Mon Sep 17 00:00:00 2001 From: Chenfan Date: Wed, 6 Aug 2025 13:44:48 +0800 Subject: [PATCH 1/3] Codegen fix --- src/relax/backend/contrib/codegen_c/codegen_c.h | 15 ++++++--------- src/relax/backend/contrib/cutlass/codegen.cc | 1 + 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/relax/backend/contrib/codegen_c/codegen_c.h b/src/relax/backend/contrib/codegen_c/codegen_c.h index 795b691dec4c..aa03a8bc1a49 100644 --- a/src/relax/backend/contrib/codegen_c/codegen_c.h +++ b/src/relax/backend/contrib/codegen_c/codegen_c.h @@ -85,29 +85,26 @@ class CodegenCBase { code_stream_ << "#endif\n"; code_stream_ << "TVM_DLL int32_t "; code_stream_ << func_name << "("; - code_stream_ << "TVMValue* args, "; - code_stream_ << "int* type_code, "; - code_stream_ << "int num_args, "; - code_stream_ << "TVMValue* out_value, "; - code_stream_ << "int* out_type_code) {\n"; + code_stream_ << "tvm::ffi::PackedArgs args, "; + code_stream_ << "tvm::ffi::AnyView* out_value) {\n"; } /*! - * \brief Adds a line to convert TVMValue args to DLTensors + * \brief Adds a line to convert tvm::ffi::PackedArgs args to DLTensors */ void PrintArgToData(int idx) { PrintIndents(); code_stream_ << "DLTensor* arg" << idx << " = "; - code_stream_ << "(DLTensor*)(((TVMValue*)args)[" << idx << "].v_handle);\n"; + code_stream_ << "(DLTensor*)(args[" << idx << "].cast());\n"; } /*! - * \brief Adds a line to convert TVMValue rets to DLTensors + * \brief Adds a line to convert tvm::ffi::PackedArgs rets to DLTensors */ void PrintRetToData(int idx) { PrintIndents(); code_stream_ << "DLTensor* ret" << idx << " = "; - code_stream_ << "(DLTensor*)(((TVMValue*)args)[" << idx << "].v_handle);\n"; + code_stream_ << "(DLTensor*)(args[" << idx << "].cast());\n"; } /*! diff --git a/src/relax/backend/contrib/cutlass/codegen.cc b/src/relax/backend/contrib/cutlass/codegen.cc index b6307af0237b..b7b9404a5fb1 100644 --- a/src/relax/backend/contrib/cutlass/codegen.cc +++ b/src/relax/backend/contrib/cutlass/codegen.cc @@ -67,6 +67,7 @@ runtime::Module Finalize(const std::string& code, const Array& func_name default_headers << "#include \n"; default_headers << "#include \n"; default_headers << "#include \n"; + default_headers << "#include \n"; const auto pf = tvm::ffi::Function::GetGlobalRequired("runtime.CSourceModuleCreate"); VLOG(1) << "Generated CUTLASS code:" << std::endl << code; From d58c14059eb2096d72c1c94a81b270e540f128fb Mon Sep 17 00:00:00 2001 From: Chenfan Date: Sat, 9 Aug 2025 22:34:56 +0800 Subject: [PATCH 2/3] Update --- python/tvm/contrib/cutlass/conv2d_operation.py | 8 ++++---- python/tvm/contrib/cutlass/gemm_operation.py | 6 +++--- src/relax/backend/contrib/codegen_c/codegen_c.h | 2 +- src/relax/backend/contrib/cutlass/codegen.cc | 1 - 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/python/tvm/contrib/cutlass/conv2d_operation.py b/python/tvm/contrib/cutlass/conv2d_operation.py index a37e46f4046c..361bcb54e532 100644 --- a/python/tvm/contrib/cutlass/conv2d_operation.py +++ b/python/tvm/contrib/cutlass/conv2d_operation.py @@ -418,17 +418,17 @@ def instantiate_conv2d_template(attrs): size_t workspace_size = conv2d_op.get_workspace_size(arguments); cutlass::device_memory::allocation workspace(workspace_size); cutlass::Status status = conv2d_op.can_implement(arguments); - CHECK(status == cutlass::Status::kSuccess); + TVM_FFI_ICHECK(status == cutlass::Status::kSuccess); ${split_k_reset} status = conv2d_op.initialize(arguments, workspace.get()); - CHECK(status == cutlass::Status::kSuccess); + TVM_FFI_ICHECK(status == cutlass::Status::kSuccess); ${split_k_update} auto func = tvm::ffi::Function::GetGlobalRequired("runtime.get_cuda_stream"); cudaStream_t stream = static_cast(func().cast()); status = conv2d_op(stream); - CHECK(status == cutlass::Status::kSuccess); + TVM_FFI_ICHECK(status == cutlass::Status::kSuccess); ${split_k_reduction} """ @@ -439,7 +439,7 @@ def instantiate_conv2d_template(attrs): split_k_update = """ arguments.output_op = {ElementComputeEpilogue(1), ElementComputeEpilogue(0)}; status = conv2d_op.update(arguments, workspace.get()); - CHECK(status == cutlass::Status::kSuccess); + TVM_FFI_ICHECK(status == cutlass::Status::kSuccess); """ split_k_reduction = """ diff --git a/python/tvm/contrib/cutlass/gemm_operation.py b/python/tvm/contrib/cutlass/gemm_operation.py index 46b68c29ee63..65dc5da772c1 100644 --- a/python/tvm/contrib/cutlass/gemm_operation.py +++ b/python/tvm/contrib/cutlass/gemm_operation.py @@ -341,15 +341,15 @@ def instantiate_gemm_template(attrs): cutlass::device_memory::allocation workspace(workspace_size); ${kernel} gemm_op; cutlass::Status status = gemm_op.can_implement(arguments); - CHECK(status == cutlass::Status::kSuccess); + TVM_FFI_ICHECK(status == cutlass::Status::kSuccess); status = gemm_op.initialize(arguments, workspace.get()); - CHECK(status == cutlass::Status::kSuccess); + TVM_FFI_ICHECK(status == cutlass::Status::kSuccess); auto func = tvm::ffi::Function::GetGlobalRequired("runtime.get_cuda_stream"); cudaStream_t stream = static_cast(func().cast()); status = gemm_op(stream); - CHECK(status == cutlass::Status::kSuccess); + TVM_FFI_ICHECK(status == cutlass::Status::kSuccess); """ op_type = attrs["op_type"] has_bias = "bias" in op_type diff --git a/src/relax/backend/contrib/codegen_c/codegen_c.h b/src/relax/backend/contrib/codegen_c/codegen_c.h index aa03a8bc1a49..48e3dc1d36fb 100644 --- a/src/relax/backend/contrib/codegen_c/codegen_c.h +++ b/src/relax/backend/contrib/codegen_c/codegen_c.h @@ -83,7 +83,7 @@ class CodegenCBase { code_stream_ << "#ifdef __cplusplus\n"; code_stream_ << "extern \"C\" {\n"; code_stream_ << "#endif\n"; - code_stream_ << "TVM_DLL int32_t "; + code_stream_ << "int32_t "; code_stream_ << func_name << "("; code_stream_ << "tvm::ffi::PackedArgs args, "; code_stream_ << "tvm::ffi::AnyView* out_value) {\n"; diff --git a/src/relax/backend/contrib/cutlass/codegen.cc b/src/relax/backend/contrib/cutlass/codegen.cc index b7b9404a5fb1..b6307af0237b 100644 --- a/src/relax/backend/contrib/cutlass/codegen.cc +++ b/src/relax/backend/contrib/cutlass/codegen.cc @@ -67,7 +67,6 @@ runtime::Module Finalize(const std::string& code, const Array& func_name default_headers << "#include \n"; default_headers << "#include \n"; default_headers << "#include \n"; - default_headers << "#include \n"; const auto pf = tvm::ffi::Function::GetGlobalRequired("runtime.CSourceModuleCreate"); VLOG(1) << "Generated CUTLASS code:" << std::endl << code; From 9be2638bfd4ba899af62f1ce72faa38aad71d6f2 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Sat, 9 Aug 2025 13:58:37 -0400 Subject: [PATCH 3/3] Update codegen_c.h --- src/relax/backend/contrib/codegen_c/codegen_c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/relax/backend/contrib/codegen_c/codegen_c.h b/src/relax/backend/contrib/codegen_c/codegen_c.h index 48e3dc1d36fb..7f04091fc178 100644 --- a/src/relax/backend/contrib/codegen_c/codegen_c.h +++ b/src/relax/backend/contrib/codegen_c/codegen_c.h @@ -83,7 +83,7 @@ class CodegenCBase { code_stream_ << "#ifdef __cplusplus\n"; code_stream_ << "extern \"C\" {\n"; code_stream_ << "#endif\n"; - code_stream_ << "int32_t "; + code_stream_ << "TVM_FFI_DLL_EXPORT int32_t "; code_stream_ << func_name << "("; code_stream_ << "tvm::ffi::PackedArgs args, "; code_stream_ << "tvm::ffi::AnyView* out_value) {\n";