From 05645a75e489644239ff188ec60f098d3b95fd06 Mon Sep 17 00:00:00 2001 From: Ruihang Lai Date: Mon, 15 Sep 2025 14:00:46 -0400 Subject: [PATCH] [FlashInfer] Update include path and interface This PR updates the include path for FlashInfer JIT compilation, and also updates the plan function interface for attention prefill computation, to align with recent interface change in flashinfer-ai/flashinfer#1661. --- python/tvm/relax/backend/cuda/flashinfer.py | 13 +++++++++---- src/runtime/vm/attn_backend.h | 6 ++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/python/tvm/relax/backend/cuda/flashinfer.py b/python/tvm/relax/backend/cuda/flashinfer.py index 1fea39e9a221..f1af2f3d1573 100644 --- a/python/tvm/relax/backend/cuda/flashinfer.py +++ b/python/tvm/relax/backend/cuda/flashinfer.py @@ -141,8 +141,8 @@ def get_object_file_path(src: Path) -> Path: ) include_paths += [ Path(tvm_home).resolve() / "include", - Path(tvm_home).resolve() / "ffi" / "include", - Path(tvm_home).resolve() / "ffi" / "3rdparty" / "dlpack" / "include", + Path(tvm_home).resolve() / "3rdparty" / "tvm-ffi" / "include", + Path(tvm_home).resolve() / "3rdparty" / "tvm-ffi" / "3rdparty" / "dlpack" / "include", Path(tvm_home).resolve() / "3rdparty" / "dmlc-core" / "include", ] else: @@ -160,8 +160,13 @@ def get_object_file_path(src: Path) -> Path: # The package is installed from source. include_paths += [ tvm_package_path.parent.parent / "include", - tvm_package_path.parent.parent / "ffi" / "include", - tvm_package_path.parent.parent / "ffi" / "3rdparty" / "dlpack" / "include", + tvm_package_path.parent.parent / "3rdparty" / "tvm-ffi" / "include", + tvm_package_path.parent.parent + / "3rdparty" + / "tvm-ffi" + / "3rdparty" + / "dlpack" + / "include", tvm_package_path.parent.parent / "3rdparty" / "dmlc-core" / "include", ] else: diff --git a/src/runtime/vm/attn_backend.h b/src/runtime/vm/attn_backend.h index bc58d1c9e1d8..ea5f49c6c08a 100644 --- a/src/runtime/vm/attn_backend.h +++ b/src/runtime/vm/attn_backend.h @@ -176,7 +176,8 @@ class FlashInferPagedPrefillFunc : public PagedPrefillFunc { plan_func_(float_workspace_buffer, int_workspace_buffer, page_locked_int_workspace_buffer, qo_indptr->as_tensor(), page_indptr->as_tensor(), IntTuple(std::move(kv_len)), total_qo_len, batch_size, num_qo_heads, num_kv_heads, page_size, - /*enable_cuda_graph=*/false, qk_head_dim, v_head_dim, causal, copy_stream) + /*enable_cuda_graph=*/false, qk_head_dim, v_head_dim, causal, + /*window_left=*/-1, copy_stream) .cast(); } else if (attn_kind == AttnKind::kMLA) { plan_info_vec = @@ -280,7 +281,8 @@ class FlashInferRaggedPrefillFunc : public RaggedPrefillFunc { plan_func_(float_workspace_buffer, int_workspace_buffer, page_locked_int_workspace_buffer, qo_indptr->as_tensor(), kv_indptr->as_tensor(), IntTuple(std::move(kv_len)), total_qo_len, batch_size, num_qo_heads, num_kv_heads, /*page_size=*/1, - /*enable_cuda_graph=*/false, qk_head_dim, v_head_dim, causal, copy_stream) + /*enable_cuda_graph=*/false, qk_head_dim, v_head_dim, causal, + /*window_left=*/-1, copy_stream) .cast(); }