From 13ad711c1ac9b1edd9beba1409b07480b7964e50 Mon Sep 17 00:00:00 2001 From: Abhijit Ramesh Date: Thu, 18 Dec 2025 15:24:15 +0400 Subject: [PATCH 1/7] ggml webgpu: add SOFTPLUS unary operator Implements SOFTPLUS (log(1 + exp(x))) with f16/f32 support. Uses f32 precision for intermediate calculations to prevent f16 overflow. * Add shader implementation and 4 variants (f32/f16, inplace/non-inplace) * Register pipelines and device support * Follow Vulkan backend numerical stability pattern --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 11 +++++++++ .../ggml-webgpu/wgsl-shaders/unary_op.wgsl | 23 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index d0e99b6fe29..5807ab0fad3 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -2273,6 +2273,16 @@ static void ggml_webgpu_init_unary_pipeline(webgpu_context & webgpu_ctx) { ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_xielu_inplace_f32, "xielu_inplace_f32", constants); webgpu_ctx->unary_pipelines[GGML_UNARY_OP_XIELU][GGML_TYPE_F16][1] = ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_xielu_inplace_f16, "xielu_inplace_f16", constants); + + // SOFTPLUS + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_SOFTPLUS][GGML_TYPE_F32][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_softplus_f32, "softplus_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_SOFTPLUS][GGML_TYPE_F16][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_softplus_f16, "softplus_f16", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_SOFTPLUS][GGML_TYPE_F32][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_softplus_inplace_f32, "softplus_inplace_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_SOFTPLUS][GGML_TYPE_F16][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_softplus_inplace_f16, "softplus_inplace_f16", constants); } static void ggml_webgpu_init_scale_pipeline(webgpu_context & webgpu_ctx) { @@ -2527,6 +2537,7 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const case GGML_UNARY_OP_HARDSIGMOID: case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_GELU_ERF: + case GGML_UNARY_OP_SOFTPLUS: case GGML_UNARY_OP_XIELU: supports_op = supports_op = (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) && (src0->type == op->type); diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl index d474ab107b4..582e34f1611 100644 --- a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +++ b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl @@ -16,7 +16,8 @@ "HARDSWISH_FUNC": "{{MUTATE}}[dst_i] = src[src_i] * min(1.0, max(0.0, (src[src_i] + 3.0) / 6.0));", "GELU_FUNC": "{{MUTATE}}[dst_i] = 0.5 * src[src_i] * (1.0 + tanh(clamp(sqrt(2.0 / 3.14159265) * (src[src_i] + 0.044715 * pow(src[src_i], 3.0)), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", "GELU_QUICK_FUNC": "{{MUTATE}}[dst_i] = src[src_i] * 0.5 * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", - "GELU_ERF_FUNC": "{{MUTATE}}[dst_i] = 0.5 * src[src_i] * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458" + "GELU_ERF_FUNC": "{{MUTATE}}[dst_i] = 0.5 * src[src_i] * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", + "SOFTPLUS_FUNC": "{ let src_f32 = f32(src[src_i]); {{MUTATE}}[dst_i] = {{TYPE}}(select(log(1.0 + exp(src_f32)), src_f32, src_f32 > 20.0)); } // Cast to f32 to prevent exp() overflow with f16 (exp(x) overflows f16 for x > ~11)" } #end(REPL_TEMPLATES) @@ -357,6 +358,26 @@ "SHADER_NAME": "gelu_erf_inplace_f16", "REPLS": { "TYPE": "f16", "FUNC": "GELU_ERF_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, "DECLS": ["INPLACE"] + }, + { + "SHADER_NAME": "softplus_f32", + "REPLS": { "TYPE": "f32", "FUNC": "SOFTPLUS_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "softplus_f16", + "REPLS": { "TYPE": "f16", "FUNC": "SOFTPLUS_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "softplus_inplace_f32", + "REPLS": { "TYPE": "f32", "FUNC": "SOFTPLUS_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, + { + "SHADER_NAME": "softplus_inplace_f16", + "REPLS": { "TYPE": "f16", "FUNC": "SOFTPLUS_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] } ] From ac51c622bf2eb3af2a900d4e369812c936a8c4ce Mon Sep 17 00:00:00 2001 From: Abhijit Ramesh Date: Thu, 18 Dec 2025 15:48:25 +0400 Subject: [PATCH 2/7] ggml webgpu: add EXPM1 unary operator Implements EXPM1 (exp(x) - 1) with f16/f32 support. * Add shader implementation and 4 variants (f32/f16, inplace/non-inplace) * Register pipelines and device support --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 11 ++++++++ .../ggml-webgpu/wgsl-shaders/unary_op.wgsl | 25 +++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 5807ab0fad3..ec554868874 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -2283,6 +2283,16 @@ static void ggml_webgpu_init_unary_pipeline(webgpu_context & webgpu_ctx) { ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_softplus_inplace_f32, "softplus_inplace_f32", constants); webgpu_ctx->unary_pipelines[GGML_UNARY_OP_SOFTPLUS][GGML_TYPE_F16][1] = ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_softplus_inplace_f16, "softplus_inplace_f16", constants); + + // EXPM1 + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_EXPM1][GGML_TYPE_F32][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_expm1_f32, "expm1_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_EXPM1][GGML_TYPE_F16][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_expm1_f16, "expm1_f16", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_EXPM1][GGML_TYPE_F32][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_expm1_inplace_f32, "expm1_inplace_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_EXPM1][GGML_TYPE_F16][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_expm1_inplace_f16, "expm1_inplace_f16", constants); } static void ggml_webgpu_init_scale_pipeline(webgpu_context & webgpu_ctx) { @@ -2538,6 +2548,7 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_GELU_ERF: case GGML_UNARY_OP_SOFTPLUS: + case GGML_UNARY_OP_EXPM1: case GGML_UNARY_OP_XIELU: supports_op = supports_op = (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) && (src0->type == op->type); diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl index 582e34f1611..7a1f267d29a 100644 --- a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +++ b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl @@ -17,7 +17,8 @@ "GELU_FUNC": "{{MUTATE}}[dst_i] = 0.5 * src[src_i] * (1.0 + tanh(clamp(sqrt(2.0 / 3.14159265) * (src[src_i] + 0.044715 * pow(src[src_i], 3.0)), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", "GELU_QUICK_FUNC": "{{MUTATE}}[dst_i] = src[src_i] * 0.5 * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", "GELU_ERF_FUNC": "{{MUTATE}}[dst_i] = 0.5 * src[src_i] * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", - "SOFTPLUS_FUNC": "{ let src_f32 = f32(src[src_i]); {{MUTATE}}[dst_i] = {{TYPE}}(select(log(1.0 + exp(src_f32)), src_f32, src_f32 > 20.0)); } // Cast to f32 to prevent exp() overflow with f16 (exp(x) overflows f16 for x > ~11)" + "SOFTPLUS_FUNC": "{ let src_f32 = f32(src[src_i]); {{MUTATE}}[dst_i] = {{TYPE}}(select(log(1.0 + exp(src_f32)), src_f32, src_f32 > 20.0)); } // Cast to f32 to prevent exp() overflow with f16 (exp(x) overflows f16 for x > ~11)", + "EXPM1_FUNC": "{{MUTATE}}[dst_i] = exp(src[src_i]) - 1.0;" } #end(REPL_TEMPLATES) @@ -378,7 +379,27 @@ "SHADER_NAME": "softplus_inplace_f16", "REPLS": { "TYPE": "f16", "FUNC": "SOFTPLUS_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, "DECLS": ["INPLACE"] - } + }, + { + "SHADER_NAME": "expm1_f32", + "REPLS": { "TYPE": "f32", "FUNC": "EXPM1_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "expm1_f16", + "REPLS": { "TYPE": "f16", "FUNC": "EXPM1_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "expm1_inplace_f32", + "REPLS": { "TYPE": "f32", "FUNC": "EXPM1_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, + { + "SHADER_NAME": "expm1_inplace_f16", + "REPLS": { "TYPE": "f16", "FUNC": "EXPM1_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, ] #end(VARIANTS) From e2a00cfb0a4eafec0785063433e64fa002e9b6da Mon Sep 17 00:00:00 2001 From: Abhijit Ramesh Date: Thu, 18 Dec 2025 16:06:27 +0400 Subject: [PATCH 3/7] ggml webgpu: add FLOOR unary operator Implements FLOOR (rounds down to nearest integer) with f16/f32 support. * Add shader implementation and 4 variants (f32/f16, inplace/non-inplace) * Register pipelines and device support --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 11 +++++++++ .../ggml-webgpu/wgsl-shaders/unary_op.wgsl | 23 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index ec554868874..4e09d4e9c8f 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -2293,6 +2293,16 @@ static void ggml_webgpu_init_unary_pipeline(webgpu_context & webgpu_ctx) { ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_expm1_inplace_f32, "expm1_inplace_f32", constants); webgpu_ctx->unary_pipelines[GGML_UNARY_OP_EXPM1][GGML_TYPE_F16][1] = ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_expm1_inplace_f16, "expm1_inplace_f16", constants); + + // FLOOR + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_FLOOR][GGML_TYPE_F32][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_floor_f32, "floor_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_FLOOR][GGML_TYPE_F16][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_floor_f16, "floor_f16", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_FLOOR][GGML_TYPE_F32][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_floor_inplace_f32, "floor_inplace_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_FLOOR][GGML_TYPE_F16][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_floor_inplace_f16, "floor_inplace_f16", constants); } static void ggml_webgpu_init_scale_pipeline(webgpu_context & webgpu_ctx) { @@ -2549,6 +2559,7 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const case GGML_UNARY_OP_GELU_ERF: case GGML_UNARY_OP_SOFTPLUS: case GGML_UNARY_OP_EXPM1: + case GGML_UNARY_OP_FLOOR: case GGML_UNARY_OP_XIELU: supports_op = supports_op = (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) && (src0->type == op->type); diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl index 7a1f267d29a..e369f470069 100644 --- a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +++ b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl @@ -18,7 +18,8 @@ "GELU_QUICK_FUNC": "{{MUTATE}}[dst_i] = src[src_i] * 0.5 * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", "GELU_ERF_FUNC": "{{MUTATE}}[dst_i] = 0.5 * src[src_i] * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", "SOFTPLUS_FUNC": "{ let src_f32 = f32(src[src_i]); {{MUTATE}}[dst_i] = {{TYPE}}(select(log(1.0 + exp(src_f32)), src_f32, src_f32 > 20.0)); } // Cast to f32 to prevent exp() overflow with f16 (exp(x) overflows f16 for x > ~11)", - "EXPM1_FUNC": "{{MUTATE}}[dst_i] = exp(src[src_i]) - 1.0;" + "EXPM1_FUNC": "{{MUTATE}}[dst_i] = exp(src[src_i]) - 1.0;", + "FLOOR_FUNC": "{{MUTATE}}[dst_i] = floor(src[src_i]);" } #end(REPL_TEMPLATES) @@ -400,6 +401,26 @@ "REPLS": { "TYPE": "f16", "FUNC": "EXPM1_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, "DECLS": ["INPLACE"] }, + { + "SHADER_NAME": "floor_f32", + "REPLS": { "TYPE": "f32", "FUNC": "FLOOR_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "floor_f16", + "REPLS": { "TYPE": "f16", "FUNC": "FLOOR_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "floor_inplace_f32", + "REPLS": { "TYPE": "f32", "FUNC": "FLOOR_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, + { + "SHADER_NAME": "floor_inplace_f16", + "REPLS": { "TYPE": "f16", "FUNC": "FLOOR_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, ] #end(VARIANTS) From 267d3b4285c4292121bf7dac403055eb532df310 Mon Sep 17 00:00:00 2001 From: Abhijit Ramesh Date: Thu, 18 Dec 2025 16:38:27 +0400 Subject: [PATCH 4/7] ggml webgpu: add CEIL unary operator Implements CEIL (rounds up to nearest integer) with f16/f32 support. * Add shader implementation and 4 variants (f32/f16, inplace/non-inplace) * Register pipelines and device support --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 11 +++++++++ .../ggml-webgpu/wgsl-shaders/unary_op.wgsl | 23 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 4e09d4e9c8f..5d94cbc3387 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -2303,6 +2303,16 @@ static void ggml_webgpu_init_unary_pipeline(webgpu_context & webgpu_ctx) { ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_floor_inplace_f32, "floor_inplace_f32", constants); webgpu_ctx->unary_pipelines[GGML_UNARY_OP_FLOOR][GGML_TYPE_F16][1] = ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_floor_inplace_f16, "floor_inplace_f16", constants); + + // CEIL + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_CEIL][GGML_TYPE_F32][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_ceil_f32, "ceil_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_CEIL][GGML_TYPE_F16][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_ceil_f16, "ceil_f16", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_CEIL][GGML_TYPE_F32][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_ceil_inplace_f32, "ceil_inplace_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_CEIL][GGML_TYPE_F16][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_ceil_inplace_f16, "ceil_inplace_f16", constants); } static void ggml_webgpu_init_scale_pipeline(webgpu_context & webgpu_ctx) { @@ -2560,6 +2570,7 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const case GGML_UNARY_OP_SOFTPLUS: case GGML_UNARY_OP_EXPM1: case GGML_UNARY_OP_FLOOR: + case GGML_UNARY_OP_CEIL: case GGML_UNARY_OP_XIELU: supports_op = supports_op = (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) && (src0->type == op->type); diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl index e369f470069..1d7c751fc91 100644 --- a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +++ b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl @@ -19,7 +19,8 @@ "GELU_ERF_FUNC": "{{MUTATE}}[dst_i] = 0.5 * src[src_i] * (1.0 + tanh(clamp(0.79788456 * (src[src_i] + 0.044715 * src[src_i] * src[src_i] * src[src_i]), -9.010913, 9.010913))); // Regarding tanh() domain restrictions in wgsl https://github.com/gpuweb/gpuweb/issues/4458", "SOFTPLUS_FUNC": "{ let src_f32 = f32(src[src_i]); {{MUTATE}}[dst_i] = {{TYPE}}(select(log(1.0 + exp(src_f32)), src_f32, src_f32 > 20.0)); } // Cast to f32 to prevent exp() overflow with f16 (exp(x) overflows f16 for x > ~11)", "EXPM1_FUNC": "{{MUTATE}}[dst_i] = exp(src[src_i]) - 1.0;", - "FLOOR_FUNC": "{{MUTATE}}[dst_i] = floor(src[src_i]);" + "FLOOR_FUNC": "{{MUTATE}}[dst_i] = floor(src[src_i]);", + "CEIL_FUNC": "{{MUTATE}}[dst_i] = ceil(src[src_i]);" } #end(REPL_TEMPLATES) @@ -421,6 +422,26 @@ "REPLS": { "TYPE": "f16", "FUNC": "FLOOR_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, "DECLS": ["INPLACE"] }, + { + "SHADER_NAME": "ceil_f32", + "REPLS": { "TYPE": "f32", "FUNC": "CEIL_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "ceil_f16", + "REPLS": { "TYPE": "f16", "FUNC": "CEIL_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "ceil_inplace_f32", + "REPLS": { "TYPE": "f32", "FUNC": "CEIL_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, + { + "SHADER_NAME": "ceil_inplace_f16", + "REPLS": { "TYPE": "f16", "FUNC": "CEIL_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, ] #end(VARIANTS) From 0e5948740cd9bd000c5303abca0dd9f68e063e2a Mon Sep 17 00:00:00 2001 From: Abhijit Ramesh Date: Thu, 25 Dec 2025 13:26:14 +0530 Subject: [PATCH 5/7] ggml webgpu: add ROUND unary operator Implements ROUND (rounds to nearest integer) with f16/f32 support. * Add shader implementation and 4 variants (f32/f16, inplace/non-inplace) * Register pipelines and device support --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 11 +++++++++ .../ggml-webgpu/wgsl-shaders/unary_op.wgsl | 23 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index 5d94cbc3387..b521176244c 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -2313,6 +2313,16 @@ static void ggml_webgpu_init_unary_pipeline(webgpu_context & webgpu_ctx) { ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_ceil_inplace_f32, "ceil_inplace_f32", constants); webgpu_ctx->unary_pipelines[GGML_UNARY_OP_CEIL][GGML_TYPE_F16][1] = ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_ceil_inplace_f16, "ceil_inplace_f16", constants); + + // ROUND + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_ROUND][GGML_TYPE_F32][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_round_f32, "round_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_ROUND][GGML_TYPE_F16][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_round_f16, "round_f16", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_ROUND][GGML_TYPE_F32][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_round_inplace_f32, "round_inplace_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_ROUND][GGML_TYPE_F16][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_round_inplace_f16, "round_inplace_f16", constants); } static void ggml_webgpu_init_scale_pipeline(webgpu_context & webgpu_ctx) { @@ -2571,6 +2581,7 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const case GGML_UNARY_OP_EXPM1: case GGML_UNARY_OP_FLOOR: case GGML_UNARY_OP_CEIL: + case GGML_UNARY_OP_ROUND: case GGML_UNARY_OP_XIELU: supports_op = supports_op = (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) && (src0->type == op->type); diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl index 1d7c751fc91..ab43bfc4a2a 100644 --- a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +++ b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl @@ -20,7 +20,8 @@ "SOFTPLUS_FUNC": "{ let src_f32 = f32(src[src_i]); {{MUTATE}}[dst_i] = {{TYPE}}(select(log(1.0 + exp(src_f32)), src_f32, src_f32 > 20.0)); } // Cast to f32 to prevent exp() overflow with f16 (exp(x) overflows f16 for x > ~11)", "EXPM1_FUNC": "{{MUTATE}}[dst_i] = exp(src[src_i]) - 1.0;", "FLOOR_FUNC": "{{MUTATE}}[dst_i] = floor(src[src_i]);", - "CEIL_FUNC": "{{MUTATE}}[dst_i] = ceil(src[src_i]);" + "CEIL_FUNC": "{{MUTATE}}[dst_i] = ceil(src[src_i]);", + "ROUND_FUNC": "{ let src_f32 = f32(src[src_i]); let result = select(ceil(src_f32 - 0.5), floor(src_f32 + 0.5), src_f32 >= 0.0); {{MUTATE}}[dst_i] = {{TYPE}}(result); } // Round half away from zero to match C roundf() behavior, not WGSL round() which uses round-to-even" } #end(REPL_TEMPLATES) @@ -442,6 +443,26 @@ "REPLS": { "TYPE": "f16", "FUNC": "CEIL_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, "DECLS": ["INPLACE"] }, + { + "SHADER_NAME": "round_f32", + "REPLS": { "TYPE": "f32", "FUNC": "ROUND_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "round_f16", + "REPLS": { "TYPE": "f16", "FUNC": "ROUND_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "round_inplace_f32", + "REPLS": { "TYPE": "f32", "FUNC": "ROUND_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, + { + "SHADER_NAME": "round_inplace_f16", + "REPLS": { "TYPE": "f16", "FUNC": "ROUND_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, ] #end(VARIANTS) From 4f358f761ccc2c917183c9c5d7ff99f9ebd55ced Mon Sep 17 00:00:00 2001 From: Abhijit Ramesh Date: Thu, 25 Dec 2025 13:47:02 +0530 Subject: [PATCH 6/7] ggml webgpu: add TRUNC unary operator Implements TRUNC (truncates towards zero) with f16/f32 support. * Add shader implementation and 4 variants (f32/f16, inplace/non-inplace) * Register pipelines and device support --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 11 +++++++++ .../ggml-webgpu/wgsl-shaders/unary_op.wgsl | 23 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index b521176244c..d9552fbf41e 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -2323,6 +2323,16 @@ static void ggml_webgpu_init_unary_pipeline(webgpu_context & webgpu_ctx) { ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_round_inplace_f32, "round_inplace_f32", constants); webgpu_ctx->unary_pipelines[GGML_UNARY_OP_ROUND][GGML_TYPE_F16][1] = ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_round_inplace_f16, "round_inplace_f16", constants); + + // TRUNC + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_TRUNC][GGML_TYPE_F32][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_trunc_f32, "trunc_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_TRUNC][GGML_TYPE_F16][0] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_trunc_f16, "trunc_f16", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_TRUNC][GGML_TYPE_F32][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_trunc_inplace_f32, "trunc_inplace_f32", constants); + webgpu_ctx->unary_pipelines[GGML_UNARY_OP_TRUNC][GGML_TYPE_F16][1] = + ggml_webgpu_create_pipeline(webgpu_ctx->device, wgsl_trunc_inplace_f16, "trunc_inplace_f16", constants); } static void ggml_webgpu_init_scale_pipeline(webgpu_context & webgpu_ctx) { @@ -2582,6 +2592,7 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const case GGML_UNARY_OP_FLOOR: case GGML_UNARY_OP_CEIL: case GGML_UNARY_OP_ROUND: + case GGML_UNARY_OP_TRUNC: case GGML_UNARY_OP_XIELU: supports_op = supports_op = (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) && (src0->type == op->type); diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl index ab43bfc4a2a..90efb78bfab 100644 --- a/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl +++ b/ggml/src/ggml-webgpu/wgsl-shaders/unary_op.wgsl @@ -21,7 +21,8 @@ "EXPM1_FUNC": "{{MUTATE}}[dst_i] = exp(src[src_i]) - 1.0;", "FLOOR_FUNC": "{{MUTATE}}[dst_i] = floor(src[src_i]);", "CEIL_FUNC": "{{MUTATE}}[dst_i] = ceil(src[src_i]);", - "ROUND_FUNC": "{ let src_f32 = f32(src[src_i]); let result = select(ceil(src_f32 - 0.5), floor(src_f32 + 0.5), src_f32 >= 0.0); {{MUTATE}}[dst_i] = {{TYPE}}(result); } // Round half away from zero to match C roundf() behavior, not WGSL round() which uses round-to-even" + "ROUND_FUNC": "{ let src_f32 = f32(src[src_i]); let result = select(ceil(src_f32 - 0.5), floor(src_f32 + 0.5), src_f32 >= 0.0); {{MUTATE}}[dst_i] = {{TYPE}}(result); } // Round half away from zero to match C roundf() behavior, not WGSL round() which uses round-to-even", + "TRUNC_FUNC": "{{MUTATE}}[dst_i] = trunc(src[src_i]);" } #end(REPL_TEMPLATES) @@ -463,6 +464,26 @@ "REPLS": { "TYPE": "f16", "FUNC": "ROUND_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, "DECLS": ["INPLACE"] }, + { + "SHADER_NAME": "trunc_f32", + "REPLS": { "TYPE": "f32", "FUNC": "TRUNC_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "trunc_f16", + "REPLS": { "TYPE": "f16", "FUNC": "TRUNC_FUNC", "EXT_PARAMS": "", "MUTATE": "dst" }, + "DECLS": ["NOT_INPLACE"] + }, + { + "SHADER_NAME": "trunc_inplace_f32", + "REPLS": { "TYPE": "f32", "FUNC": "TRUNC_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, + { + "SHADER_NAME": "trunc_inplace_f16", + "REPLS": { "TYPE": "f16", "FUNC": "TRUNC_FUNC", "EXT_PARAMS": "", "MUTATE": "src" }, + "DECLS": ["INPLACE"] + }, ] #end(VARIANTS) From c4c4f7796f18c1e4ecd5521acc2aea165ac2a800 Mon Sep 17 00:00:00 2001 From: Abhijit Ramesh Date: Thu, 25 Dec 2025 14:38:02 +0530 Subject: [PATCH 7/7] docs : update WebGPU support for unary operators (FLOOR, CEIL, ROUND, TRUNC, EXPM1, SOFTPLUS) --- docs/ops.md | 13 +- docs/ops/WebGPU.csv | 778 ++++++++++++++++++++++---------------------- 2 files changed, 402 insertions(+), 389 deletions(-) diff --git a/docs/ops.md b/docs/ops.md index ef1febccaec..8d466cd1f3c 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -22,7 +22,7 @@ Legend: | ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | -| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | +| CEIL | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ | | CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ | ❌ | | CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ❌ | ❌ | ❌ | | CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | ❌ | @@ -37,15 +37,16 @@ Legend: | CROSS_ENTROPY_LOSS | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CROSS_ENTROPY_LOSS_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CUMSUM | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | +| DIAG | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | DIAG_MASK_INF | ❌ | ✅ | ✅ | ✅ | ❌ | 🟡 | ✅ | ✅ | ❌ | ❌ | ❌ | | DIV | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ | | DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | ✅ | ❌ | ❌ | ❌ | | ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | | EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | 🟡 | ✅ | ❌ | ❌ | -| EXPM1 | ❌ | ❌ | ✅ | 🟡 | 🟡 | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| EXPM1 | ❌ | ❌ | ✅ | 🟡 | 🟡 | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | FILL | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | | FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ | -| FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | +| FLOOR | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ | | GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | | GEGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | ❌ | | GEGLU_ERF | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ✅ | ❌ | ❌ | @@ -87,7 +88,7 @@ Legend: | ROLL | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | | ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | -| ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | +| ROUND | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ | | RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | @@ -99,7 +100,7 @@ Legend: | SILU_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | | SIN | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | | SOFTCAP | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| SOFTPLUS | ❌ | ❌ | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ | +| SOFTPLUS | ❌ | ❌ | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ | ❌ | | SOFT_MAX | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | | SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ | ❌ | ❌ | | SOLVE_TRI | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ | @@ -117,6 +118,6 @@ Legend: | TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | | TOP_K | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ | | TRI | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | -| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | ❌ | +| TRUNC | ❌ | ❌ | ✅ | 🟡 | ❌ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ | | UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | ❌ | | XIELU | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | diff --git a/docs/ops/WebGPU.csv b/docs/ops/WebGPU.csv index bfff75e66f0..0c71ac7897a 100644 --- a/docs/ops/WebGPU.csv +++ b/docs/ops/WebGPU.csv @@ -27,20 +27,20 @@ "WebGPU: WebGPU","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","EXP","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" -"WebGPU: WebGPU","EXPM1","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","EXPM1","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","SOFTPLUS","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" +"WebGPU: WebGPU","EXPM1","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","EXPM1","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","SOFTPLUS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" @@ -69,20 +69,20 @@ "WebGPU: WebGPU","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","EXP","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" -"WebGPU: WebGPU","EXPM1","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","EXPM1","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","SOFTPLUS","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" +"WebGPU: WebGPU","EXPM1","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","EXPM1","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","SOFTPLUS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" @@ -111,20 +111,20 @@ "WebGPU: WebGPU","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","EXP","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" -"WebGPU: WebGPU","EXPM1","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","EXPM1","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","SOFTPLUS","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","SOFTPLUS","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" +"WebGPU: WebGPU","EXPM1","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","EXPM1","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","SOFTPLUS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","SOFTPLUS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","WebGPU" "WebGPU: WebGPU","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" @@ -153,20 +153,20 @@ "WebGPU: WebGPU","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","EXP","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" -"WebGPU: WebGPU","EXPM1","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","EXPM1","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","SOFTPLUS","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","SOFTPLUS","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" +"WebGPU: WebGPU","EXPM1","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","EXPM1","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","SOFTPLUS","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","SOFTPLUS","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","WebGPU" "WebGPU: WebGPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","WebGPU" "WebGPU: WebGPU","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","WebGPU" "WebGPU: WebGPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","yes","WebGPU" @@ -4964,6 +4964,7 @@ "WebGPU: WebGPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","0","no","WebGPU" "WebGPU: WebGPU","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","0","no","WebGPU" "WebGPU: WebGPU","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","0","no","WebGPU" +"WebGPU: WebGPU","CONV_TRANSPOSE_2D","ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","0","no","WebGPU" "WebGPU: WebGPU","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","ARGMAX","type=f32,ne=[32,1,1,1]","support","0","no","WebGPU" @@ -8661,10 +8662,10 @@ "WebGPU: WebGPU","COS","type=f16,ne=[10,2,2,2]","support","0","no","WebGPU" "WebGPU: WebGPU","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","no","WebGPU" "WebGPU: WebGPU","LEAKY_RELU","type=f16,ne_a=[10,5,4,3],negative_slope=0.100000","support","0","no","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f16,ne=[10,2,2,2]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne=[10,2,2,2]","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f16,ne=[10,2,2,2]","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f16,ne=[10,2,2,2]","support","0","no","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","WebGPU" "WebGPU: WebGPU","SQR","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" "WebGPU: WebGPU","SQRT","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" "WebGPU: WebGPU","LOG","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" @@ -8672,14 +8673,14 @@ "WebGPU: WebGPU","COS","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" "WebGPU: WebGPU","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","0","no","WebGPU" "WebGPU: WebGPU","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","0","no","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f16,ne=[1024,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f16,ne=[1024,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f16,ne=[1024,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f16,ne=[7,1,5,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f16,ne=[1024,1024,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f16,ne=[1024,1024,1,1]","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f16,ne=[1024,1024,1,1]","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f16,ne=[1024,1024,1,1]","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f16,ne=[1024,1024,1,1]","support","1","yes","WebGPU" "WebGPU: WebGPU","SQR","type=f32,ne=[10,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","SQRT","type=f32,ne=[10,3,3,2]","support","0","no","WebGPU" "WebGPU: WebGPU","LOG","type=f32,ne=[10,5,4,3]","support","0","no","WebGPU" @@ -8687,10 +8688,10 @@ "WebGPU: WebGPU","COS","type=f32,ne=[10,2,2,2]","support","0","no","WebGPU" "WebGPU: WebGPU","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","no","WebGPU" "WebGPU: WebGPU","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","0","no","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f32,ne=[10,2,2,2]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne=[10,2,2,2]","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f32,ne=[10,2,2,2]","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f32,ne=[10,2,2,2]","support","0","no","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f32,ne=[10,2,2,2]","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne=[10,2,2,2]","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f32,ne=[10,2,2,2]","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f32,ne=[10,2,2,2]","support","1","yes","WebGPU" "WebGPU: WebGPU","SQR","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" "WebGPU: WebGPU","SQRT","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" "WebGPU: WebGPU","LOG","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" @@ -8698,14 +8699,14 @@ "WebGPU: WebGPU","COS","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" "WebGPU: WebGPU","CLAMP","type=f32,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","0","no","WebGPU" "WebGPU: WebGPU","LEAKY_RELU","type=f32,ne_a=[7,1,5,3],negative_slope=0.100000","support","0","no","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","FLOOR","type=f32,ne=[1024,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","CEIL","type=f32,ne=[1024,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","ROUND","type=f32,ne=[1024,1024,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f32,ne=[7,1,5,3]","support","0","no","WebGPU" -"WebGPU: WebGPU","TRUNC","type=f32,ne=[1024,1024,1,1]","support","0","no","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f32,ne=[7,1,5,3]","support","1","yes","WebGPU" +"WebGPU: WebGPU","FLOOR","type=f32,ne=[1024,1024,1,1]","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne=[7,1,5,3]","support","1","yes","WebGPU" +"WebGPU: WebGPU","CEIL","type=f32,ne=[1024,1024,1,1]","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f32,ne=[7,1,5,3]","support","1","yes","WebGPU" +"WebGPU: WebGPU","ROUND","type=f32,ne=[1024,1024,1,1]","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f32,ne=[7,1,5,3]","support","1","yes","WebGPU" +"WebGPU: WebGPU","TRUNC","type=f32,ne=[1024,1024,1,1]","support","1","yes","WebGPU" "WebGPU: WebGPU","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","0","no","WebGPU" "WebGPU: WebGPU","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","0","no","WebGPU" "WebGPU: WebGPU","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","0","no","WebGPU" @@ -9542,311 +9543,311 @@ "WebGPU: WebGPU","ARGSORT","type=f32,ne=[2048,2,1,3],order=1","support","0","no","WebGPU" "WebGPU: WebGPU","ARGSORT","type=f32,ne=[2049,2,1,3],order=1","support","0","no","WebGPU" "WebGPU: WebGPU","ARGSORT","type=f32,ne=[2,8,8192,1],order=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[12,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[13,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[13,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=100","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=500","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=1023","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=9999","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=1","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=2","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=3","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=7","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=15","support","0","no","WebGPU" -"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=15","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[12,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[13,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[13,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[15,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[19,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[27,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[43,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[64,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[75,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[128,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[139,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[256,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[267,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[512,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[523,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1035,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2059,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4096,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[4107,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8192,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[8203,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16395,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32768,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[32779,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65536,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[65547,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131072,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[131083,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262144,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[262155,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=100,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=500,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=1023,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524288,1,1,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[524299,1,2,1],k=9999,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=1,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=2,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=3,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=7,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16,10,10,10],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[60,10,10,10],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1023,2,1,3],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1024,2,1,3],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[1025,2,1,3],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[16384,1,1,1],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2047,2,1,3],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2048,2,1,3],k=15,ties=0","support","0","no","WebGPU" +"WebGPU: WebGPU","TOP_K","type=f32,ne=[2049,2,1,3],k=15,ties=0","support","0","no","WebGPU" "WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","0","no","WebGPU" "WebGPU: WebGPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","0","no","WebGPU" "WebGPU: WebGPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=nearest,flags=none","support","0","no","WebGPU" @@ -9891,8 +9892,9 @@ "WebGPU: WebGPU","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","0","no","WebGPU" "WebGPU: WebGPU","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","0","no","WebGPU" "WebGPU: WebGPU","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1,circular=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[33,17,2,1],pad_0=4,pad_1=3,circular=1","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0,circular=0","support","0","no","WebGPU" "WebGPU: WebGPU","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","0","no","WebGPU" "WebGPU: WebGPU","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","0","no","WebGPU" "WebGPU: WebGPU","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","WebGPU" @@ -9903,6 +9905,7 @@ "WebGPU: WebGPU","CUMSUM","type=f32,ne=[10,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","CUMSUM","type=f32,ne=[127,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","CUMSUM","type=f32,ne=[128,5,4,3]","support","0","no","WebGPU" +"WebGPU: WebGPU","CUMSUM","type=f32,ne=[128,128,4,4]","support","0","no","WebGPU" "WebGPU: WebGPU","CUMSUM","type=f32,ne=[255,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","CUMSUM","type=f32,ne=[256,5,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","CUMSUM","type=f32,ne=[511,5,4,3]","support","0","no","WebGPU" @@ -9922,6 +9925,9 @@ "WebGPU: WebGPU","FILL","type=f32,ne=[303,207,11,3],c=2.000000","support","0","no","WebGPU" "WebGPU: WebGPU","FILL","type=f32,ne=[800,600,4,4],c=-152.000000","support","0","no","WebGPU" "WebGPU: WebGPU","FILL","type=f32,ne=[2048,512,2,2],c=3.500000","support","0","no","WebGPU" +"WebGPU: WebGPU","DIAG","type=f32,ne=[10,1,4,3]","support","0","no","WebGPU" +"WebGPU: WebGPU","DIAG","type=f32,ne=[79,1,19,13]","support","0","no","WebGPU" +"WebGPU: WebGPU","DIAG","type=f32,ne=[256,1,8,16]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[10,10,4,3],ne_rhs=[3,10,4,3]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[11,11,1,1],ne_rhs=[5,11,1,1]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[17,17,2,4],ne_rhs=[9,17,2,4]","support","0","no","WebGPU" @@ -9929,10 +9935,16 @@ "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[42,42,5,2],ne_rhs=[10,42,5,2]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[64,64,2,2],ne_rhs=[10,64,2,2]","support","0","no","WebGPU" "WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[100,100,4,4],ne_rhs=[41,100,4,4]","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1","support","0","no","WebGPU" -"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[128,128,4,4],ne_rhs=[31,128,4,4]","support","0","no","WebGPU" +"WebGPU: WebGPU","SOLVE_TRI","type=f32,ne_lhs=[64,64,4,4],ne_rhs=[300,64,4,4]","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=0,circular=1","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=0,circular=1","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=0","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[512,512,1,1],lp0=0,rp0=1,lp1=0,rp1=1,lp2=0,rp2=0,lp3=0,rp3=0,v=1,circular=1","support","0","no","WebGPU" +"WebGPU: WebGPU","PAD","type=f32,ne_a=[11,22,33,44],lp0=1,rp0=2,lp1=3,rp1=4,lp2=5,rp2=6,lp3=7,rp3=8,v=1,circular=1","support","0","no","WebGPU" "WebGPU: WebGPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f32,permute=[0,1,2,3]","support","0","no","WebGPU" "WebGPU: WebGPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","no","WebGPU" "WebGPU: WebGPU","FLASH_ATTN_EXT","hsk=40,hsv=40,nh=4,nr23=[1,1],kv=113,nb=1,mask=1,sinks=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","no","WebGPU"