From fd714bc952649d686e0091de2a2014ece7ea2203 Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Sun, 2 Apr 2023 10:08:52 -0700 Subject: [PATCH 1/4] [Impeller] reduce gaussian sampling by 2x --- .../filters/gaussian_blur_filter_contents.cc | 2 ++ .../shaders/gaussian_blur/gaussian_blur.glsl | 30 ++++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc index 3f0a1f467c9d4..1b4c92765c687 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc @@ -240,6 +240,8 @@ std::optional DirectionalGaussianBlurFilterContents::RenderFilter( source_descriptor.height_address_mode = SamplerAddressMode::kRepeat; break; } + input_descriptor.mag_filter = MinMagFilter::kLinear; + input_descriptor.min_filter = MinMagFilter::kLinear; bool has_alpha_mask = blur_style_ != BlurStyle::kNormal; bool has_decal_specialization = diff --git a/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl b/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl index 9df092e1299e4..d394a4bf7961c 100644 --- a/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl +++ b/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl @@ -60,14 +60,28 @@ void main() { f16vec4 total_color = f16vec4(0.0hf); float16_t gaussian_integral = 0.0hf; - for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius; i++) { - float16_t gaussian = IPGaussian(i, blur_info.blur_sigma); - gaussian_integral += gaussian; - total_color += - gaussian * Sample(texture_sampler, // sampler - v_texture_coords + blur_info.blur_uv_offset * - i // texture coordinates - ); + for (float16_t i = 0.0hf; i <= blur_info.blur_radius; i += 2.0hf) { + float16_t w1 = IPGaussian(i, blur_info.blur_sigma); + float16_t w2 = IPGaussian(i + 1.0hf, blur_info.blur_sigma); + float16_t gaussian = w1 + w2; + + f16vec2 offset_1 = blur_info.blur_uv_offset * i; + f16vec2 offset_2 = offset_1 + blur_info.blur_uv_offset; + vec2 pos_c1 = v_texture_coords + offset_1; + vec2 pos_c2 = v_texture_coords + offset_2; + vec2 neg_c1 = v_texture_coords - offset_1; + vec2 neg_c2 = v_texture_coords - offset_2; + + vec2 pos_coord = (w1 * pos_c1 + w2 * pos_c2) / gaussian; + vec2 neg_coord = (w1 * neg_c1 + w2 * neg_c2) / gaussian; + + gaussian_integral += (gaussian + gaussian); + total_color += gaussian * Sample(texture_sampler, // sampler + pos_coord // texture coordinates + ); + total_color += gaussian * Sample(texture_sampler, // sampler + neg_coord // texture coordinates + ); } frag_color = total_color / gaussian_integral; From 4dc88334d12fa2b0c530446a51c8adeb85a4c71d Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Mon, 3 Apr 2023 10:58:15 -0700 Subject: [PATCH 2/4] patch malioc --- impeller/tools/malioc.json | 229 ++++++++++++++++++------------------- 1 file changed, 114 insertions(+), 115 deletions(-) diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index 1853a5d3beb13..65f8bb1fea7de 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -3242,7 +3242,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 53, + "fp16_arithmetic": 38, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3271,9 +3271,9 @@ "arith_cvt" ], "shortest_path_cycles": [ - 0.328125, + 0.3125, 0.078125, - 0.328125, + 0.3125, 0.25, 0.0, 0.25, @@ -3281,22 +3281,22 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_cvt" + "arith_sfu" ], "total_cycles": [ - 0.578125, - 0.25, - 0.578125, - 0.5, + 0.8125, + 0.699999988079071, + 0.737500011920929, + 0.8125, 0.0, 0.5, - 0.5 + 0.75 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 20 + "work_registers_used": 27 } } } @@ -3314,7 +3314,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 45, + "fp16_arithmetic": 32, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3343,32 +3343,31 @@ "texture" ], "shortest_path_cycles": [ - 0.1875, + 0.171875, 0.078125, - 0.1875, + 0.171875, 0.0625, 0.0, 0.25, 0.25 ], "total_bound_pipelines": [ - "varying", "texture" ], "total_cycles": [ - 0.34375, + 0.699999988079071, + 0.699999988079071, + 0.40625, 0.25, - 0.34375, - 0.125, 0.0, 0.5, - 0.5 + 0.75 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 14, - "work_registers_used": 14 + "uniform_registers_used": 12, + "work_registers_used": 25 } } } @@ -3386,7 +3385,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 42, + "fp16_arithmetic": 32, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3415,9 +3414,9 @@ "arith_cvt" ], "shortest_path_cycles": [ - 0.109375, + 0.09375, 0.03125, - 0.109375, + 0.09375, 0.0625, 0.0, 0.0, @@ -3425,22 +3424,22 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_sfu" + "arith_fma" ], "total_cycles": [ - 0.3125, - 0.203125, - 0.296875, - 0.3125, + 0.65625, + 0.65625, + 0.453125, + 0.625, 0.0, 0.25, - 0.25 + 0.5 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, - "work_registers_used": 16 + "uniform_registers_used": 8, + "work_registers_used": 23 } } } @@ -3458,7 +3457,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 35, + "fp16_arithmetic": 27, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3487,32 +3486,32 @@ "arith_cvt" ], "shortest_path_cycles": [ - 0.109375, + 0.09375, 0.03125, - 0.109375, + 0.09375, 0.0625, 0.0, 0.0, 0.0 ], "total_bound_pipelines": [ - "varying", - "texture" + "arith_total", + "arith_fma" ], "total_cycles": [ - 0.203125, - 0.203125, - 0.203125, - 0.125, + 0.65625, + 0.65625, + 0.265625, + 0.25, 0.0, 0.25, - 0.25 + 0.5 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, - "work_registers_used": 13 + "uniform_registers_used": 8, + "work_registers_used": 26 } } } @@ -6603,7 +6602,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 66, + "fp16_arithmetic": 45, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6644,22 +6643,22 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_cvt" + "arith_sfu" ], "total_cycles": [ - 0.53125, - 0.328125, - 0.53125, - 0.5, + 0.8125, + 0.75, + 0.675000011920929, + 0.8125, 0.0, 0.5, - 0.5 + 0.75 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 21 + "work_registers_used": 27 } } }, @@ -6670,7 +6669,7 @@ "type": "Fragment", "variants": { "Main": { - "has_stack_spilling": false, + "has_stack_spilling": true, "performance": { "longest_path_bound_pipelines": [ null @@ -6686,20 +6685,20 @@ "texture" ], "shortest_path_bound_pipelines": [ - "arithmetic" + "load_store" ], "shortest_path_cycles": [ - 3.299999952316284, - 2.0, + 3.9600000381469727, + 6.0, 0.0 ], "total_bound_pipelines": [ - "arithmetic" + "load_store" ], "total_cycles": [ - 7.666666507720947, - 2.0, - 2.0 + 14.0, + 15.0, + 3.0 ] }, "thread_occupancy": 100, @@ -6722,7 +6721,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 61, + "fp16_arithmetic": 40, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6751,32 +6750,33 @@ "texture" ], "shortest_path_cycles": [ - 0.140625, + 0.15625, 0.125, - 0.140625, + 0.15625, 0.0625, 0.0, 0.25, 0.25 ], "total_bound_pipelines": [ - "varying", + "arith_total", + "arith_fma", "texture" ], "total_cycles": [ - 0.328125, - 0.328125, - 0.328125, - 0.125, + 0.75, + 0.75, + 0.375, + 0.25, 0.0, 0.5, - 0.5 + 0.75 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 20 + "work_registers_used": 23 } } }, @@ -6787,7 +6787,7 @@ "type": "Fragment", "variants": { "Main": { - "has_stack_spilling": false, + "has_stack_spilling": true, "performance": { "longest_path_bound_pipelines": [ null @@ -6803,25 +6803,25 @@ "texture" ], "shortest_path_bound_pipelines": [ - "arithmetic" + "load_store" ], "shortest_path_cycles": [ - 2.309999942779541, - 2.0, + 2.640000104904175, + 6.0, 1.0 ], "total_bound_pipelines": [ - "arithmetic" + "load_store" ], "total_cycles": [ - 5.0, - 2.0, - 2.0 + 8.333333015441895, + 10.0, + 3.0 ] }, "thread_occupancy": 100, "uniform_registers_used": 1, - "work_registers_used": 3 + "work_registers_used": 4 } } } @@ -6839,7 +6839,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 70, + "fp16_arithmetic": 43, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6879,22 +6879,22 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_sfu" + "arith_fma" ], "total_cycles": [ - 0.3125, - 0.234375, - 0.28125, - 0.3125, + 0.65625, + 0.65625, + 0.421875, + 0.625, 0.0, 0.25, - 0.25 + 0.5 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 8, - "work_registers_used": 20 + "work_registers_used": 23 } } }, @@ -6905,7 +6905,7 @@ "type": "Fragment", "variants": { "Main": { - "has_stack_spilling": false, + "has_stack_spilling": true, "performance": { "longest_path_bound_pipelines": [ null @@ -6921,20 +6921,20 @@ "texture" ], "shortest_path_bound_pipelines": [ - "arithmetic" + "load_store" ], "shortest_path_cycles": [ - 1.649999976158142, - 1.0, + 1.9800000190734863, + 5.0, 0.0 ], "total_bound_pipelines": [ - "arithmetic" + "load_store" ], "total_cycles": [ - 5.0, - 1.0, - 1.0 + 11.0, + 14.0, + 2.0 ] }, "thread_occupancy": 100, @@ -6957,7 +6957,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 66, + "fp16_arithmetic": 38, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6983,36 +6983,35 @@ ], "shortest_path_bound_pipelines": [ "arith_total", - "arith_cvt", - "arith_sfu" + "arith_cvt" ], "shortest_path_cycles": [ - 0.0625, + 0.078125, 0.03125, - 0.0625, + 0.078125, 0.0625, 0.0, 0.0, 0.0 ], "total_bound_pipelines": [ - "varying", - "texture" + "arith_total", + "arith_fma" ], "total_cycles": [ - 0.234375, - 0.234375, - 0.1875, - 0.125, + 0.65625, + 0.65625, + 0.25, + 0.25, 0.0, 0.25, - 0.25 + 0.5 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 8, - "work_registers_used": 19 + "work_registers_used": 23 } } }, @@ -7023,7 +7022,7 @@ "type": "Fragment", "variants": { "Main": { - "has_stack_spilling": false, + "has_stack_spilling": true, "performance": { "longest_path_bound_pipelines": [ null @@ -7039,25 +7038,25 @@ "texture" ], "shortest_path_bound_pipelines": [ - "arithmetic" + "load_store" ], "shortest_path_cycles": [ 1.649999976158142, - 1.0, + 5.0, 0.0 ], "total_bound_pipelines": [ - "arithmetic" + "load_store" ], "total_cycles": [ - 3.6666667461395264, - 1.0, - 1.0 + 6.666666507720947, + 9.0, + 2.0 ] }, "thread_occupancy": 100, "uniform_registers_used": 1, - "work_registers_used": 2 + "work_registers_used": 4 } } } From 7c19f90909f46ca893362cb435dcc8918d678eea Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Mon, 3 Apr 2023 15:47:07 -0700 Subject: [PATCH 3/4] bdero review --- .../filters/gaussian_blur_filter_contents.cc | 2 +- .../shaders/gaussian_blur/gaussian_blur.glsl | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc index 1b4c92765c687..49350b66941b1 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc @@ -195,7 +195,7 @@ std::optional DirectionalGaussianBlurFilterContents::RenderFilter( FS::BlurInfo frag_info; auto r = Radius{transformed_blur_radius_length}; frag_info.blur_sigma = Sigma{r}.sigma; - frag_info.blur_radius = r.radius; + frag_info.blur_radius = std::round(r.radius); // The blur direction is in input UV space. frag_info.blur_uv_offset = diff --git a/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl b/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl index d394a4bf7961c..c97b25c343470 100644 --- a/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl +++ b/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl @@ -60,7 +60,8 @@ void main() { f16vec4 total_color = f16vec4(0.0hf); float16_t gaussian_integral = 0.0hf; - for (float16_t i = 0.0hf; i <= blur_info.blur_radius; i += 2.0hf) { + for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius; + i += 2.0hf) { float16_t w1 = IPGaussian(i, blur_info.blur_sigma); float16_t w2 = IPGaussian(i + 1.0hf, blur_info.blur_sigma); float16_t gaussian = w1 + w2; @@ -69,18 +70,12 @@ void main() { f16vec2 offset_2 = offset_1 + blur_info.blur_uv_offset; vec2 pos_c1 = v_texture_coords + offset_1; vec2 pos_c2 = v_texture_coords + offset_2; - vec2 neg_c1 = v_texture_coords - offset_1; - vec2 neg_c2 = v_texture_coords - offset_2; - vec2 pos_coord = (w1 * pos_c1 + w2 * pos_c2) / gaussian; - vec2 neg_coord = (w1 * neg_c1 + w2 * neg_c2) / gaussian; + vec2 coords = (w1 * pos_c1 + w2 * pos_c2) / gaussian; - gaussian_integral += (gaussian + gaussian); + gaussian_integral += gaussian; total_color += gaussian * Sample(texture_sampler, // sampler - pos_coord // texture coordinates - ); - total_color += gaussian * Sample(texture_sampler, // sampler - neg_coord // texture coordinates + coords // texture coordinates ); } From 2c2a19a3b1c13ed297cf2b8aeb4b9cf5a19ce061 Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Mon, 3 Apr 2023 16:30:31 -0700 Subject: [PATCH 4/4] ++ --- impeller/tools/malioc.json | 195 +++++++++++++++++++------------------ 1 file changed, 98 insertions(+), 97 deletions(-) diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index 65f8bb1fea7de..b34a59e2b62dc 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -3242,7 +3242,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 38, + "fp16_arithmetic": 33, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3271,9 +3271,9 @@ "arith_cvt" ], "shortest_path_cycles": [ - 0.3125, + 0.328125, 0.078125, - 0.3125, + 0.328125, 0.25, 0.0, 0.25, @@ -3284,19 +3284,19 @@ "arith_sfu" ], "total_cycles": [ - 0.8125, - 0.699999988079071, - 0.737500011920929, - 0.8125, + 0.625, + 0.515625, + 0.609375, + 0.625, 0.0, 0.5, - 0.75 + 0.5 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 27 + "work_registers_used": 22 } } } @@ -3314,7 +3314,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 32, + "fp16_arithmetic": 26, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3343,31 +3343,32 @@ "texture" ], "shortest_path_cycles": [ - 0.171875, + 0.1875, 0.078125, - 0.171875, + 0.1875, 0.0625, 0.0, 0.25, 0.25 ], "total_bound_pipelines": [ - "texture" + "arith_total", + "arith_fma" ], "total_cycles": [ - 0.699999988079071, - 0.699999988079071, - 0.40625, + 0.515625, + 0.515625, + 0.375, 0.25, 0.0, 0.5, - 0.75 + 0.5 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, - "work_registers_used": 25 + "uniform_registers_used": 14, + "work_registers_used": 21 } } } @@ -3385,7 +3386,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 32, + "fp16_arithmetic": 23, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3414,9 +3415,9 @@ "arith_cvt" ], "shortest_path_cycles": [ - 0.09375, + 0.109375, 0.03125, - 0.09375, + 0.109375, 0.0625, 0.0, 0.0, @@ -3427,19 +3428,19 @@ "arith_fma" ], "total_cycles": [ - 0.65625, - 0.65625, - 0.453125, - 0.625, + 0.46875, + 0.46875, + 0.328125, + 0.4375, 0.0, 0.25, - 0.5 + 0.25 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, - "work_registers_used": 23 + "uniform_registers_used": 10, + "work_registers_used": 18 } } } @@ -3457,7 +3458,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 27, + "fp16_arithmetic": 19, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3486,9 +3487,9 @@ "arith_cvt" ], "shortest_path_cycles": [ - 0.09375, + 0.109375, 0.03125, - 0.09375, + 0.109375, 0.0625, 0.0, 0.0, @@ -3499,19 +3500,19 @@ "arith_fma" ], "total_cycles": [ - 0.65625, - 0.65625, - 0.265625, + 0.46875, + 0.46875, + 0.234375, 0.25, 0.0, 0.25, - 0.5 + 0.25 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, - "work_registers_used": 26 + "uniform_registers_used": 10, + "work_registers_used": 19 } } } @@ -6602,7 +6603,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 45, + "fp16_arithmetic": 52, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6646,19 +6647,19 @@ "arith_sfu" ], "total_cycles": [ - 0.8125, - 0.75, - 0.675000011920929, - 0.8125, + 0.625, + 0.578125, + 0.546875, + 0.625, 0.0, 0.5, - 0.75 + 0.5 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 27 + "work_registers_used": 25 } } }, @@ -6685,20 +6686,20 @@ "texture" ], "shortest_path_bound_pipelines": [ - "load_store" + "arithmetic" ], "shortest_path_cycles": [ - 3.9600000381469727, - 6.0, + 3.630000114440918, + 1.0, 0.0 ], "total_bound_pipelines": [ - "load_store" + "arithmetic" ], "total_cycles": [ - 14.0, - 15.0, - 3.0 + 10.333333015441895, + 6.0, + 2.0 ] }, "thread_occupancy": 100, @@ -6721,7 +6722,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 40, + "fp16_arithmetic": 47, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6750,9 +6751,9 @@ "texture" ], "shortest_path_cycles": [ - 0.15625, + 0.140625, 0.125, - 0.15625, + 0.140625, 0.0625, 0.0, 0.25, @@ -6760,23 +6761,22 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_fma", - "texture" + "arith_fma" ], "total_cycles": [ - 0.75, - 0.75, - 0.375, + 0.578125, + 0.578125, + 0.34375, 0.25, 0.0, 0.5, - 0.75 + 0.5 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 23 + "work_registers_used": 22 } } }, @@ -6806,17 +6806,17 @@ "load_store" ], "shortest_path_cycles": [ - 2.640000104904175, - 6.0, + 2.9700000286102295, + 7.0, 1.0 ], "total_bound_pipelines": [ "load_store" ], "total_cycles": [ - 8.333333015441895, - 10.0, - 3.0 + 8.0, + 11.0, + 2.0 ] }, "thread_occupancy": 100, @@ -6839,7 +6839,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 43, + "fp16_arithmetic": 50, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6882,13 +6882,13 @@ "arith_fma" ], "total_cycles": [ - 0.65625, - 0.65625, - 0.421875, - 0.625, + 0.484375, + 0.484375, + 0.296875, + 0.4375, 0.0, 0.25, - 0.5 + 0.25 ] }, "stack_spill_bytes": 0, @@ -6921,20 +6921,20 @@ "texture" ], "shortest_path_bound_pipelines": [ - "load_store" + "arithmetic" ], "shortest_path_cycles": [ 1.9800000190734863, - 5.0, + 0.0, 0.0 ], "total_bound_pipelines": [ - "load_store" + "arithmetic" ], "total_cycles": [ - 11.0, - 14.0, - 2.0 + 7.666666507720947, + 5.0, + 1.0 ] }, "thread_occupancy": 100, @@ -6957,7 +6957,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 38, + "fp16_arithmetic": 47, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6983,12 +6983,13 @@ ], "shortest_path_bound_pipelines": [ "arith_total", - "arith_cvt" + "arith_cvt", + "arith_sfu" ], "shortest_path_cycles": [ - 0.078125, + 0.0625, 0.03125, - 0.078125, + 0.0625, 0.0625, 0.0, 0.0, @@ -6999,19 +7000,19 @@ "arith_fma" ], "total_cycles": [ - 0.65625, - 0.65625, - 0.25, + 0.484375, + 0.484375, + 0.203125, 0.25, 0.0, 0.25, - 0.5 + 0.25 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 8, - "work_registers_used": 23 + "work_registers_used": 22 } } }, @@ -7022,7 +7023,7 @@ "type": "Fragment", "variants": { "Main": { - "has_stack_spilling": true, + "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ null @@ -7038,25 +7039,25 @@ "texture" ], "shortest_path_bound_pipelines": [ - "load_store" + "arithmetic" ], "shortest_path_cycles": [ 1.649999976158142, - 5.0, + 1.0, 0.0 ], "total_bound_pipelines": [ - "load_store" + "arithmetic" ], "total_cycles": [ - 6.666666507720947, - 9.0, - 2.0 + 5.666666507720947, + 1.0, + 1.0 ] }, - "thread_occupancy": 100, + "thread_occupancy": 50, "uniform_registers_used": 1, - "work_registers_used": 4 + "work_registers_used": 6 } } }