diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc index 3f0a1f467c9d4..49350b66941b1 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc @@ -195,7 +195,7 @@ std::optional DirectionalGaussianBlurFilterContents::RenderFilter( FS::BlurInfo frag_info; auto r = Radius{transformed_blur_radius_length}; frag_info.blur_sigma = Sigma{r}.sigma; - frag_info.blur_radius = r.radius; + frag_info.blur_radius = std::round(r.radius); // The blur direction is in input UV space. frag_info.blur_uv_offset = @@ -240,6 +240,8 @@ std::optional DirectionalGaussianBlurFilterContents::RenderFilter( source_descriptor.height_address_mode = SamplerAddressMode::kRepeat; break; } + input_descriptor.mag_filter = MinMagFilter::kLinear; + input_descriptor.min_filter = MinMagFilter::kLinear; bool has_alpha_mask = blur_style_ != BlurStyle::kNormal; bool has_decal_specialization = diff --git a/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl b/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl index 9df092e1299e4..c97b25c343470 100644 --- a/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl +++ b/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl @@ -60,14 +60,23 @@ void main() { f16vec4 total_color = f16vec4(0.0hf); float16_t gaussian_integral = 0.0hf; - for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius; i++) { - float16_t gaussian = IPGaussian(i, blur_info.blur_sigma); + for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius; + i += 2.0hf) { + float16_t w1 = IPGaussian(i, blur_info.blur_sigma); + float16_t w2 = IPGaussian(i + 1.0hf, blur_info.blur_sigma); + float16_t gaussian = w1 + w2; + + f16vec2 offset_1 = blur_info.blur_uv_offset * i; + f16vec2 offset_2 = offset_1 + blur_info.blur_uv_offset; + vec2 pos_c1 = v_texture_coords + offset_1; + vec2 pos_c2 = v_texture_coords + offset_2; + + vec2 coords = (w1 * pos_c1 + w2 * pos_c2) / gaussian; + gaussian_integral += gaussian; - total_color += - gaussian * Sample(texture_sampler, // sampler - v_texture_coords + blur_info.blur_uv_offset * - i // texture coordinates - ); + total_color += gaussian * Sample(texture_sampler, // sampler + coords // texture coordinates + ); } frag_color = total_color / gaussian_integral; diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index 1853a5d3beb13..b34a59e2b62dc 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -3242,7 +3242,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 53, + "fp16_arithmetic": 33, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3281,13 +3281,13 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_cvt" + "arith_sfu" ], "total_cycles": [ - 0.578125, - 0.25, - 0.578125, - 0.5, + 0.625, + 0.515625, + 0.609375, + 0.625, 0.0, 0.5, 0.5 @@ -3296,7 +3296,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 20 + "work_registers_used": 22 } } } @@ -3314,7 +3314,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 45, + "fp16_arithmetic": 26, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3352,14 +3352,14 @@ 0.25 ], "total_bound_pipelines": [ - "varying", - "texture" + "arith_total", + "arith_fma" ], "total_cycles": [ - 0.34375, + 0.515625, + 0.515625, + 0.375, 0.25, - 0.34375, - 0.125, 0.0, 0.5, 0.5 @@ -3368,7 +3368,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 14, - "work_registers_used": 14 + "work_registers_used": 21 } } } @@ -3386,7 +3386,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 42, + "fp16_arithmetic": 23, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3425,13 +3425,13 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_sfu" + "arith_fma" ], "total_cycles": [ - 0.3125, - 0.203125, - 0.296875, - 0.3125, + 0.46875, + 0.46875, + 0.328125, + 0.4375, 0.0, 0.25, 0.25 @@ -3440,7 +3440,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 10, - "work_registers_used": 16 + "work_registers_used": 18 } } } @@ -3458,7 +3458,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 35, + "fp16_arithmetic": 19, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -3496,14 +3496,14 @@ 0.0 ], "total_bound_pipelines": [ - "varying", - "texture" + "arith_total", + "arith_fma" ], "total_cycles": [ - 0.203125, - 0.203125, - 0.203125, - 0.125, + 0.46875, + 0.46875, + 0.234375, + 0.25, 0.0, 0.25, 0.25 @@ -3512,7 +3512,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 10, - "work_registers_used": 13 + "work_registers_used": 19 } } } @@ -6603,7 +6603,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 66, + "fp16_arithmetic": 52, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6644,13 +6644,13 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_cvt" + "arith_sfu" ], "total_cycles": [ - 0.53125, - 0.328125, - 0.53125, - 0.5, + 0.625, + 0.578125, + 0.546875, + 0.625, 0.0, 0.5, 0.5 @@ -6659,7 +6659,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 21 + "work_registers_used": 25 } } }, @@ -6670,7 +6670,7 @@ "type": "Fragment", "variants": { "Main": { - "has_stack_spilling": false, + "has_stack_spilling": true, "performance": { "longest_path_bound_pipelines": [ null @@ -6689,16 +6689,16 @@ "arithmetic" ], "shortest_path_cycles": [ - 3.299999952316284, - 2.0, + 3.630000114440918, + 1.0, 0.0 ], "total_bound_pipelines": [ "arithmetic" ], "total_cycles": [ - 7.666666507720947, - 2.0, + 10.333333015441895, + 6.0, 2.0 ] }, @@ -6722,7 +6722,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 61, + "fp16_arithmetic": 47, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6760,14 +6760,14 @@ 0.25 ], "total_bound_pipelines": [ - "varying", - "texture" + "arith_total", + "arith_fma" ], "total_cycles": [ - 0.328125, - 0.328125, - 0.328125, - 0.125, + 0.578125, + 0.578125, + 0.34375, + 0.25, 0.0, 0.5, 0.5 @@ -6776,7 +6776,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 20 + "work_registers_used": 22 } } }, @@ -6787,7 +6787,7 @@ "type": "Fragment", "variants": { "Main": { - "has_stack_spilling": false, + "has_stack_spilling": true, "performance": { "longest_path_bound_pipelines": [ null @@ -6803,25 +6803,25 @@ "texture" ], "shortest_path_bound_pipelines": [ - "arithmetic" + "load_store" ], "shortest_path_cycles": [ - 2.309999942779541, - 2.0, + 2.9700000286102295, + 7.0, 1.0 ], "total_bound_pipelines": [ - "arithmetic" + "load_store" ], "total_cycles": [ - 5.0, - 2.0, + 8.0, + 11.0, 2.0 ] }, "thread_occupancy": 100, "uniform_registers_used": 1, - "work_registers_used": 3 + "work_registers_used": 4 } } } @@ -6839,7 +6839,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 70, + "fp16_arithmetic": 50, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6879,13 +6879,13 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_sfu" + "arith_fma" ], "total_cycles": [ - 0.3125, - 0.234375, - 0.28125, - 0.3125, + 0.484375, + 0.484375, + 0.296875, + 0.4375, 0.0, 0.25, 0.25 @@ -6894,7 +6894,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 8, - "work_registers_used": 20 + "work_registers_used": 23 } } }, @@ -6905,7 +6905,7 @@ "type": "Fragment", "variants": { "Main": { - "has_stack_spilling": false, + "has_stack_spilling": true, "performance": { "longest_path_bound_pipelines": [ null @@ -6924,16 +6924,16 @@ "arithmetic" ], "shortest_path_cycles": [ - 1.649999976158142, - 1.0, + 1.9800000190734863, + 0.0, 0.0 ], "total_bound_pipelines": [ "arithmetic" ], "total_cycles": [ + 7.666666507720947, 5.0, - 1.0, 1.0 ] }, @@ -6957,7 +6957,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 66, + "fp16_arithmetic": 47, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6996,14 +6996,14 @@ 0.0 ], "total_bound_pipelines": [ - "varying", - "texture" + "arith_total", + "arith_fma" ], "total_cycles": [ - 0.234375, - 0.234375, - 0.1875, - 0.125, + 0.484375, + 0.484375, + 0.203125, + 0.25, 0.0, 0.25, 0.25 @@ -7012,7 +7012,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 8, - "work_registers_used": 19 + "work_registers_used": 22 } } }, @@ -7050,14 +7050,14 @@ "arithmetic" ], "total_cycles": [ - 3.6666667461395264, + 5.666666507720947, 1.0, 1.0 ] }, - "thread_occupancy": 100, + "thread_occupancy": 50, "uniform_registers_used": 1, - "work_registers_used": 2 + "work_registers_used": 6 } } }