From 8717ac144ac31378f9efc389b4e66de3db699349 Mon Sep 17 00:00:00 2001 From: Sergiu <8598216+mzsergiu@users.noreply.github.com> Date: Fri, 10 Apr 2026 08:49:21 +0300 Subject: [PATCH 1/2] fix: crash when sending image under 2x2 pixels --- tools/mtmd/mtmd-image.cpp | 53 +++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/tools/mtmd/mtmd-image.cpp b/tools/mtmd/mtmd-image.cpp index 4f4eb5da690..c1a36011788 100644 --- a/tools/mtmd/mtmd-image.cpp +++ b/tools/mtmd/mtmd-image.cpp @@ -198,35 +198,38 @@ struct img_tool { private: // Bilinear resize function static void resize_bilinear(const clip_image_u8 & src, clip_image_u8 & dst, int target_width, int target_height) { - GGML_ASSERT(src.nx >= 2 && src.ny >= 2); + if (src.nx == 0 || src.ny == 0) { dst.nx = dst.ny = 0; dst.buf.clear(); return; } + if (target_width <= 0) target_width = 1; + if (target_height <= 0) target_height = 1; + dst.nx = target_width; dst.ny = target_height; dst.buf.resize(3 * target_width * target_height); - float x_ratio = static_cast(src.nx - 1) / target_width; - float y_ratio = static_cast(src.ny - 1) / target_height; - - for (int y = 0; y < target_height; y++) { - for (int x = 0; x < target_width; x++) { - float px = x_ratio * x; - float py = y_ratio * y; - int x_floor = std::min(static_cast(px), src.nx - 2); - int y_floor = std::min(static_cast(py), src.ny - 2); - float x_lerp = px - x_floor; - float y_lerp = py - y_floor; - - for (int c = 0; c < 3; c++) { - float top = lerp( - static_cast(src.buf[3 * (y_floor * src.nx + x_floor) + c]), - static_cast(src.buf[3 * (y_floor * src.nx + (x_floor + 1)) + c]), - x_lerp - ); - float bottom = lerp( - static_cast(src.buf[3 * ((y_floor + 1) * src.nx + x_floor) + c]), - static_cast(src.buf[3 * ((y_floor + 1) * src.nx + (x_floor + 1)) + c]), - x_lerp - ); - dst.buf[3 * (y * target_width + x) + c] = static_cast(lerp(top, bottom, y_lerp)); + float x_ratio = target_width > 1 ? static_cast(src.nx - 1) / (target_width - 1) : 0.0f; + float y_ratio = target_height > 1 ? static_cast(src.ny - 1) / (target_height - 1) : 0.0f; + + for (int y = 0; y < target_height; ++y) { + for (int x = 0; x < target_width; ++x) { + float px = x * x_ratio; + float py = y * y_ratio; + + int x0 = std::min(static_cast(px), src.nx - 1); + int y0 = std::min(static_cast(py), src.ny - 1); + int x1 = std::min(x0 + 1, src.nx - 1); + int y1 = std::min(y0 + 1, src.ny - 1); + + float xf = px - x0; + float yf = py - y0; + + for (int c = 0; c < 3; ++c) { + float top = lerp(static_cast(src.buf[3 * (y0 * src.nx + x0) + c]), + static_cast(src.buf[3 * (y0 * src.nx + x1) + c]), + xf); + float bottom = lerp(static_cast(src.buf[3 * (y1 * src.nx + x0) + c]), + static_cast(src.buf[3 * (y1 * src.nx + x1) + c]), + xf); + dst.buf[3 * (y * target_width + x) + c] = static_cast(lerp(top, bottom, yf)); } } } From ff5ef8278615a2462b79b50abdf3cc95cfb31c6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sat, 11 Apr 2026 18:52:11 +0200 Subject: [PATCH 2/2] CUDA: skip compilation of superfluous FA kernels (#21768) --- ggml/src/ggml-cuda/fattn.cu | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu index addf93205ef..ea6607cd337 100644 --- a/ggml/src/ggml-cuda/fattn.cu +++ b/ggml/src/ggml-cuda/fattn.cu @@ -75,13 +75,17 @@ static void ggml_cuda_flash_attn_ext_mma_f16_switch_ncols2(ggml_backend_cuda_con return; } - if (use_gqa_opt && gqa_ratio % 2 == 0) { - ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ctx, dst); + if constexpr (DKQ <= 256) { + if (use_gqa_opt && gqa_ratio % 2 == 0) { + ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ctx, dst); + return; + } + + ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ctx, dst); return; + } else { + GGML_ABORT("fatal error"); } - - ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ctx, dst); - return; } if (use_gqa_opt && gqa_ratio > 4) { @@ -94,12 +98,16 @@ static void ggml_cuda_flash_attn_ext_mma_f16_switch_ncols2(ggml_backend_cuda_con return; } - if (use_gqa_opt && gqa_ratio > 1) { - ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ctx, dst); - return; - } + if constexpr (DKQ <= 256) { + if (use_gqa_opt && gqa_ratio > 1) { + ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ctx, dst); + return; + } - ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ctx, dst); + ggml_cuda_flash_attn_ext_mma_f16_switch_ncols1(ctx, dst); + } else { + GGML_ABORT("fatal error"); + } } static void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {