From c766229b71f1e92c12296f55c00698028a410266 Mon Sep 17 00:00:00 2001 From: Nexesenex <124105151+Nexesenex@users.noreply.github.com> Date: Thu, 27 Jun 2024 17:45:09 +0200 Subject: [PATCH] CUDA: fix MMQ stream-k for --split-mode row #8167 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Johannes Gäßler --- ggml-cuda/mmq.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-cuda/mmq.cuh b/ggml-cuda/mmq.cuh index f97bbd6e738..0b357fc0a1e 100644 --- a/ggml-cuda/mmq.cuh +++ b/ggml-cuda/mmq.cuh @@ -2476,7 +2476,7 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a const dim3 block_nums_mmq(nsm, 1, 1); - ggml_cuda_pool & pool = ctx.pool(); + ggml_cuda_pool & pool = ctx.pool(id); ggml_cuda_pool_alloc tmp_fixup(pool, block_nums_mmq.x * mmq_x*mmq_y); if (args.ne01 % mmq_y == 0) {