From fd54823c101f100b98765449cb2ddd085b2a2cfa Mon Sep 17 00:00:00 2001 From: uvos Date: Wed, 11 Mar 2026 18:35:59 +0100 Subject: [PATCH] Get warp size at runtime warp_size is not known at compile time in hip host code. --- ggml/src/ggml-cuda/gated_delta_net.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/gated_delta_net.cu b/ggml/src/ggml-cuda/gated_delta_net.cu index 003bdecacd0..a1b3a70cb49 100644 --- a/ggml/src/ggml-cuda/gated_delta_net.cu +++ b/ggml/src/ggml-cuda/gated_delta_net.cu @@ -146,7 +146,7 @@ static void launch_gated_delta_net( int64_t neqk1, int64_t rq3, float scale, cudaStream_t stream) { //TODO: Add chunked kernel for even faster pre-fill - constexpr uint32_t warp_size = ggml_cuda_get_physical_warp_size(); + const int warp_size = ggml_cuda_info().devices[ggml_cuda_get_device()].warp_size; const int num_warps = 4; dim3 grid_dims(H, n_seqs, (S_v + num_warps - 1) / num_warps); dim3 block_dims(warp_size <= S_v ? warp_size : S_v, num_warps, 1);