From c7d73f8c04e2a976db5bcd8eaf690c6e608d7686 Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Tue, 28 Mar 2023 21:50:40 -0700 Subject: [PATCH 1/7] [Impeller] gaussian functions to half precision --- .../shader_lib/impeller/gaussian.glsl | 40 ++++++------- .../compiler/shader_lib/impeller/texture.glsl | 9 +++ impeller/entity/shaders/border_mask_blur.frag | 42 +++++++------- impeller/entity/shaders/border_mask_blur.vert | 4 +- .../shaders/gaussian_blur/gaussian_blur.glsl | 47 +++++++-------- .../shaders/gaussian_blur/gaussian_blur.vert | 8 +-- impeller/entity/shaders/rrect_blur.frag | 57 ++++++++++--------- impeller/entity/shaders/rrect_blur.vert | 2 +- 8 files changed, 111 insertions(+), 98 deletions(-) diff --git a/impeller/compiler/shader_lib/impeller/gaussian.glsl b/impeller/compiler/shader_lib/impeller/gaussian.glsl index 62874bec96d9c..9dd104d6e4ce2 100644 --- a/impeller/compiler/shader_lib/impeller/gaussian.glsl +++ b/impeller/compiler/shader_lib/impeller/gaussian.glsl @@ -6,51 +6,53 @@ #define GAUSSIAN_GLSL_ #include +#include /// Gaussian distribution function. -float IPGaussian(float x, float sigma) { - float variance = sigma * sigma; - return exp(-0.5 * x * x / variance) / (kSqrtTwoPi * sigma); +float16_t IPGaussian(float16_t x, float16_t sigma) { + float16_t variance = sigma * sigma; + return exp(-0.5hf * x * x / variance) / (float16_t(kSqrtTwoPi) * sigma); } /// Abramowitz and Stegun erf approximation. -float IPErf(float x) { - float a = abs(x); +float16_t IPErf(float16_t x) { + float16_t a = abs(x); // 0.278393*x + 0.230389*x^2 + 0.078108*x^4 + 1 - float b = (0.278393 + (0.230389 + 0.078108 * a * a) * a) * a + 1.0; - return sign(x) * (1 - 1 / (b * b * b * b)); + float16_t b = + (0.278393hf + (0.230389hf + 0.078108hf * a * a) * a) * a + 1.0hf; + return sign(x) * (1.0hf - 1.0hf / (b * b * b * b)); } /// Vec2 variation for the Abramowitz and Stegun erf approximation. -vec2 IPVec2Erf(vec2 x) { - vec2 a = abs(x); +f16vec2 IPVec2Erf(f16vec2 x) { + f16vec2 a = abs(x); // 0.278393*x + 0.230389*x^2 + 0.078108*x^4 + 1 - vec2 b = (0.278393 + (0.230389 + 0.078108 * a * a) * a) * a + 1.0; - return sign(x) * (1 - 1 / (b * b * b * b)); + f16vec2 b = (0.278393hf + (0.230389hf + 0.078108hf * a * a) * a) * a + 1.0hf; + return sign(x) * (1.0hf - 1.0hf / (b * b * b * b)); } /// The indefinite integral of the Gaussian function. /// Uses a very close approximation of Erf. -float IPGaussianIntegral(float x, float sigma) { +float16_t IPGaussianIntegral(float16_t x, float16_t sigma) { // ( 1 + erf( x * (sqrt(2) / (2 * sigma) ) ) / 2 - return (1 + IPErf(x * (kHalfSqrtTwo / sigma))) * 0.5; + return (1.0hf + IPErf(x * (float16_t(kHalfSqrtTwo) / sigma))) * 0.5hf; } /// Vec2 variation for the indefinite integral of the Gaussian function. /// Uses a very close approximation of Erf. -vec2 IPVec2GaussianIntegral(vec2 x, float sigma) { +f16vec2 IPVec2GaussianIntegral(f16vec2 x, float16_t sigma) { // ( 1 + erf( x * (sqrt(2) / (2 * sigma) ) ) / 2 - return (1 + IPVec2Erf(x * (kHalfSqrtTwo / sigma))) * 0.5; + return (1.0hf + IPVec2Erf(x * (float16_t(kHalfSqrtTwo) / sigma))) * 0.5hf; } /// Simpler (but less accurate) approximation of the Gaussian integral. -vec2 IPVec2FastGaussianIntegral(vec2 x, float sigma) { - return 1 / (1 + exp(-kSqrtThree / sigma * x)); +f16vec2 IPVec2FastGaussianIntegral(f16vec2 x, float16_t sigma) { + return 1.0hf / (1.0hf + exp(float16_t(-kSqrtThree) / sigma * x)); } /// Simple logistic sigmoid with a domain of [-1, 1] and range of [0, 1]. -float IPSigmoid(float x) { - return 1.03731472073 / (1 + exp(-4 * x)) - 0.0186573603638; +float16_t IPSigmoid(float16_t x) { + return 1.03731472073hf / (1.0hf + exp(-4.0hf * x)) - 0.0186573603638hf; } #endif diff --git a/impeller/compiler/shader_lib/impeller/texture.glsl b/impeller/compiler/shader_lib/impeller/texture.glsl index 7979e897ea922..f143ec2b5dd61 100644 --- a/impeller/compiler/shader_lib/impeller/texture.glsl +++ b/impeller/compiler/shader_lib/impeller/texture.glsl @@ -122,6 +122,15 @@ vec4 IPSampleDecal(sampler2D texture_sampler, vec2 coords) { return texture(texture_sampler, coords); } +/// Sample a texture with decal tile mode. +vec4 IPHalfSampleDecal(f16sampler2D texture_sampler, f16vec2 coords) { + if (any(lessThan(coords, f16vec2(0.0hf))) || + any(greaterThanEqual(coords, f16vec2(1.0)))) { + return f16vec4(0.0); + } + return texture(texture_sampler, coords); +} + /// Sample a texture, emulating a specific tile mode. /// /// This is useful for Impeller graphics backend that don't have native support diff --git a/impeller/entity/shaders/border_mask_blur.frag b/impeller/entity/shaders/border_mask_blur.frag index b28dfc8210380..e0e89b2e8edc5 100644 --- a/impeller/entity/shaders/border_mask_blur.frag +++ b/impeller/entity/shaders/border_mask_blur.frag @@ -15,42 +15,42 @@ // integral (using an erf approximation) to the 4 edges of the UV rectangle and // multiplying them. -uniform sampler2D texture_sampler; +uniform f16sampler2D texture_sampler; uniform FragInfo { - float src_factor; - float inner_blur_factor; - float outer_blur_factor; + float16_t src_factor; + float16_t inner_blur_factor; + float16_t outer_blur_factor; - vec2 sigma_uv; + f16vec2 sigma_uv; } frag_info; -in vec2 v_texture_coords; +in f16vec2 v_texture_coords; -out vec4 frag_color; +out f16vec4 frag_color; -float BoxBlurMask(vec2 uv) { +float16_t BoxBlurMask(f16vec2 uv) { // LTRB - return IPGaussianIntegral(uv.x, frag_info.sigma_uv.x) * // - IPGaussianIntegral(uv.y, frag_info.sigma_uv.y) * // - IPGaussianIntegral(1 - uv.x, frag_info.sigma_uv.x) * // - IPGaussianIntegral(1 - uv.y, frag_info.sigma_uv.y); + return IPGaussianIntegral(uv.x, frag_info.sigma_uv.x) * // + IPGaussianIntegral(uv.y, frag_info.sigma_uv.y) * // + IPGaussianIntegral(1.0hf - uv.x, frag_info.sigma_uv.x) * // + IPGaussianIntegral(1.0hf - uv.y, frag_info.sigma_uv.y); } void main() { - vec4 image_color = texture(texture_sampler, v_texture_coords); - float blur_factor = BoxBlurMask(v_texture_coords); + f16vec4 image_color = texture(texture_sampler, v_texture_coords); + float16_t blur_factor = BoxBlurMask(v_texture_coords); - float within_bounds = - float(v_texture_coords.x >= 0 && v_texture_coords.y >= 0 && - v_texture_coords.x < 1 && v_texture_coords.y < 1); - float inner_factor = + float16_t within_bounds = + float16_t(v_texture_coords.x >= 0.0hf && v_texture_coords.y >= 0.0hf && + v_texture_coords.x < 1.0hf && v_texture_coords.y < 1.0hf); + float16_t inner_factor = (frag_info.inner_blur_factor * blur_factor + frag_info.src_factor) * within_bounds; - float outer_factor = - frag_info.outer_blur_factor * blur_factor * (1 - within_bounds); + float16_t outer_factor = + frag_info.outer_blur_factor * blur_factor * (1.0hf - within_bounds); - float mask_factor = inner_factor + outer_factor; + float16_t mask_factor = inner_factor + outer_factor; frag_color = image_color * mask_factor; } diff --git a/impeller/entity/shaders/border_mask_blur.vert b/impeller/entity/shaders/border_mask_blur.vert index bff59a4747e65..9b87041d22117 100644 --- a/impeller/entity/shaders/border_mask_blur.vert +++ b/impeller/entity/shaders/border_mask_blur.vert @@ -19,6 +19,6 @@ out vec2 v_texture_coords; void main() { gl_Position = frame_info.mvp * vec4(vertices, 0.0, 1.0); - v_texture_coords = - IPRemapCoords(texture_coords, frame_info.texture_sampler_y_coord_scale); + v_texture_coords = f16vec2( + IPRemapCoords(texture_coords, frame_info.texture_sampler_y_coord_scale)); } diff --git a/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl b/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl index 4a218303efc27..c99adfe449e24 100644 --- a/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl +++ b/impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl @@ -18,52 +18,52 @@ #include #include -uniform sampler2D texture_sampler; +uniform f16sampler2D texture_sampler; uniform BlurInfo { - vec2 texture_size; - vec2 blur_direction; + f16vec2 texture_size; + f16vec2 blur_direction; // The blur sigma and radius have a linear relationship which is defined // host-side, but both are useful controls here. Sigma (pixels per standard // deviation) is used to define the gaussian function itself, whereas the // radius is used to limit how much of the function is integrated. - float blur_sigma; - float blur_radius; + float16_t blur_sigma; + float16_t blur_radius; } blur_info; #if ENABLE_ALPHA_MASK -uniform sampler2D alpha_mask_sampler; +uniform f16sampler2D alpha_mask_sampler; uniform MaskInfo { - float src_factor; - float inner_blur_factor; - float outer_blur_factor; + float16_t src_factor; + float16_t inner_blur_factor; + float16_t outer_blur_factor; } mask_info; #endif -vec4 Sample(sampler2D tex, vec2 coords) { +f16vec4 Sample(f16sampler2D tex, f16vec2 coords) { #if ENABLE_DECAL_SPECIALIZATION - return IPSampleDecal(tex, coords); + return IPHalfSampleDecal(tex, coords); #else return texture(tex, coords); #endif } -in vec2 v_texture_coords; -in vec2 v_src_texture_coords; +in f16vec2 v_texture_coords; +in f16vec2 v_src_texture_coords; -out vec4 frag_color; +out f16vec4 frag_color; void main() { - vec4 total_color = vec4(0); - float gaussian_integral = 0; - vec2 blur_uv_offset = blur_info.blur_direction / blur_info.texture_size; + f16vec4 total_color = f16vec4(0.0hf); + float16_t gaussian_integral = 0.0hf; + f16vec2 blur_uv_offset = blur_info.blur_direction / blur_info.texture_size; - for (float i = -blur_info.blur_radius; i <= blur_info.blur_radius; i++) { - float gaussian = IPGaussian(i, blur_info.blur_sigma); + for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius; i++) { + float16_t gaussian = IPGaussian(i, blur_info.blur_sigma); gaussian_integral += gaussian; total_color += gaussian * @@ -75,11 +75,12 @@ void main() { frag_color = total_color / gaussian_integral; #if ENABLE_ALPHA_MASK - vec4 src_color = Sample(alpha_mask_sampler, // sampler - v_src_texture_coords // texture coordinates + f16vec4 src_color = Sample(alpha_mask_sampler, // sampler + v_src_texture_coords // texture coordinates ); - float blur_factor = mask_info.inner_blur_factor * float(src_color.a > 0) + - mask_info.outer_blur_factor * float(src_color.a == 0); + float16_t blur_factor = + mask_info.inner_blur_factor * float16_t(src_color.a > 0.0hf) + + mask_info.outer_blur_factor * float16_t(src_color.a == 0.0hf); frag_color = frag_color * blur_factor + src_color * mask_info.src_factor; #endif diff --git a/impeller/entity/shaders/gaussian_blur/gaussian_blur.vert b/impeller/entity/shaders/gaussian_blur/gaussian_blur.vert index f402003b13bf1..28b8f29fc8fcc 100644 --- a/impeller/entity/shaders/gaussian_blur/gaussian_blur.vert +++ b/impeller/entity/shaders/gaussian_blur/gaussian_blur.vert @@ -21,8 +21,8 @@ out vec2 v_src_texture_coords; void main() { gl_Position = frame_info.mvp * vec4(vertices, 0.0, 1.0); - v_texture_coords = - IPRemapCoords(texture_coords, frame_info.texture_sampler_y_coord_scale); - v_src_texture_coords = IPRemapCoords( - src_texture_coords, frame_info.alpha_mask_sampler_y_coord_scale); + v_texture_coords = f16vec2( + IPRemapCoords(texture_coords, frame_info.texture_sampler_y_coord_scale)); + v_src_texture_coords = f16vec2(IPRemapCoords( + src_texture_coords, frame_info.alpha_mask_sampler_y_coord_scale)); } diff --git a/impeller/entity/shaders/rrect_blur.frag b/impeller/entity/shaders/rrect_blur.frag index 5b0ddff80976c..adfb99493a5d0 100644 --- a/impeller/entity/shaders/rrect_blur.frag +++ b/impeller/entity/shaders/rrect_blur.frag @@ -6,58 +6,59 @@ #include uniform FragInfo { - vec4 color; - float blur_sigma; - vec2 rect_size; - float corner_radius; + f16vec4 color; + f16vec2 rect_size; + float16_t blur_sigma; + float16_t corner_radius; } frag_info; -in vec2 v_position; +in f16vec2 v_position; -out vec4 frag_color; +out f16vec4 frag_color; const int kSampleCount = 4; -float RRectDistance(vec2 sample_position, vec2 half_size) { - vec2 space = abs(sample_position) - half_size + frag_info.corner_radius; - return length(max(space, 0.0)) + min(max(space.x, space.y), 0.0) - +float16_t RRectDistance(f16vec2 sample_position, f16vec2 half_size) { + f16vec2 space = abs(sample_position) - half_size + frag_info.corner_radius; + return length(max(space, 0.0hf)) + min(max(space.x, space.y), 0.0hf) - frag_info.corner_radius; } /// Closed form unidirectional rounded rect blur mask solution using the /// analytical Gaussian integral (with approximated erf). -float RRectShadowX(vec2 sample_position, vec2 half_size) { +float16_t RRectShadowX(f16vec2 sample_position, f16vec2 half_size) { // Compute the X direction distance field (not incorporating the Y distance) // for the rounded rect. - float space = - min(0, half_size.y - frag_info.corner_radius - abs(sample_position.y)); - float rrect_distance = + float16_t space = min( + 0.0hf, half_size.y - frag_info.corner_radius - abs(sample_position.y)); + float16_t rrect_distance = half_size.x - frag_info.corner_radius + - sqrt(max(0, frag_info.corner_radius * frag_info.corner_radius - - space * space)); + sqrt(max(0.0hf, frag_info.corner_radius * frag_info.corner_radius - + space * space)); // Map the linear distance field to the approximate Gaussian integral. - vec2 integral = IPVec2FastGaussianIntegral( - sample_position.x + vec2(-rrect_distance, rrect_distance), + f16vec2 integral = IPVec2FastGaussianIntegral( + sample_position.x + f16vec2(-rrect_distance, rrect_distance), frag_info.blur_sigma); return integral.y - integral.x; } -float RRectShadow(vec2 sample_position, vec2 half_size) { +float16_t RRectShadow(f16vec2 sample_position, f16vec2 half_size) { // Limit the sampling range to 3 standard deviations in the Y direction from // the kernel center to incorporate 99.7% of the color contribution. - float half_sampling_range = frag_info.blur_sigma * 3; + float16_t half_sampling_range = frag_info.blur_sigma * 3.0hf; - float begin_y = max(-half_sampling_range, sample_position.y - half_size.y); - float end_y = min(half_sampling_range, sample_position.y + half_size.y); - float interval = (end_y - begin_y) / kSampleCount; + float16_t begin_y = + max(-half_sampling_range, sample_position.y - half_size.y); + float16_t end_y = min(half_sampling_range, sample_position.y + half_size.y); + float16_t interval = (end_y - begin_y) / float16_t(kSampleCount); // Sample the X blur kSampleCount times, weighted by the Gaussian function. - float result = 0; + float16_t result = 0.0hf; for (int sample_i = 0; sample_i < kSampleCount; sample_i++) { - float y = begin_y + interval * (sample_i + 0.5); - result += RRectShadowX(vec2(sample_position.x, sample_position.y - y), + float16_t y = begin_y + interval * (float16_t(sample_i) + 0.5hf); + result += RRectShadowX(f16vec2(sample_position.x, sample_position.y - y), half_size) * IPGaussian(y, frag_info.blur_sigma) * interval; } @@ -68,10 +69,10 @@ float RRectShadow(vec2 sample_position, vec2 half_size) { void main() { frag_color = frag_info.color; - vec2 half_size = frag_info.rect_size * 0.5; - vec2 sample_position = v_position - half_size; + f16vec2 half_size = frag_info.rect_size * 0.5hf; + f16vec2 sample_position = v_position - half_size; - if (frag_info.blur_sigma > 0) { + if (frag_info.blur_sigma > 0.0hf) { frag_color *= RRectShadow(sample_position, half_size); } else { frag_color *= -RRectDistance(sample_position, half_size); diff --git a/impeller/entity/shaders/rrect_blur.vert b/impeller/entity/shaders/rrect_blur.vert index 87382f6b4dcbe..12ebd0d3a4125 100644 --- a/impeller/entity/shaders/rrect_blur.vert +++ b/impeller/entity/shaders/rrect_blur.vert @@ -16,5 +16,5 @@ out vec2 v_position; void main() { gl_Position = frame_info.mvp * vec4(position, 0.0, 1.0); // The fragment stage uses local coordinates to compute the blur. - v_position = position; + v_position = f16vec2(position); } From 68c84a2365cdaa206cdad4ece9664524760cf94a Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Tue, 28 Mar 2023 22:04:54 -0700 Subject: [PATCH 2/7] ++ --- impeller/compiler/shader_lib/impeller/texture.glsl | 2 +- impeller/entity/shaders/rrect_blur.frag | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/impeller/compiler/shader_lib/impeller/texture.glsl b/impeller/compiler/shader_lib/impeller/texture.glsl index 196fda3c1cfd8..d8b244294a756 100644 --- a/impeller/compiler/shader_lib/impeller/texture.glsl +++ b/impeller/compiler/shader_lib/impeller/texture.glsl @@ -144,7 +144,7 @@ vec4 IPSampleDecal(sampler2D texture_sampler, vec2 coords) { } /// Sample a texture with decal tile mode. -vec4 IPHalfSampleDecal(f16sampler2D texture_sampler, f16vec2 coords) { +f16vec4 IPHalfSampleDecal(f16sampler2D texture_sampler, f16vec2 coords) { if (any(lessThan(coords, f16vec2(0.0hf))) || any(greaterThanEqual(coords, f16vec2(1.0)))) { return f16vec4(0.0); diff --git a/impeller/entity/shaders/rrect_blur.frag b/impeller/entity/shaders/rrect_blur.frag index adfb99493a5d0..9b342481187b2 100644 --- a/impeller/entity/shaders/rrect_blur.frag +++ b/impeller/entity/shaders/rrect_blur.frag @@ -21,8 +21,8 @@ const int kSampleCount = 4; float16_t RRectDistance(f16vec2 sample_position, f16vec2 half_size) { f16vec2 space = abs(sample_position) - half_size + frag_info.corner_radius; - return length(max(space, 0.0hf)) + min(max(space.x, space.y), 0.0hf) - - frag_info.corner_radius; + return length(max(space, float16_t(0.0hf))) + + min(max(space.x, space.y), float16_t(0.0hf)) - frag_info.corner_radius; } /// Closed form unidirectional rounded rect blur mask solution using the @@ -30,12 +30,14 @@ float16_t RRectDistance(f16vec2 sample_position, f16vec2 half_size) { float16_t RRectShadowX(f16vec2 sample_position, f16vec2 half_size) { // Compute the X direction distance field (not incorporating the Y distance) // for the rounded rect. - float16_t space = min( - 0.0hf, half_size.y - frag_info.corner_radius - abs(sample_position.y)); + float16_t space = + min(float16_t(0.0hf), + half_size.y - frag_info.corner_radius - abs(sample_position.y)); float16_t rrect_distance = half_size.x - frag_info.corner_radius + - sqrt(max(0.0hf, frag_info.corner_radius * frag_info.corner_radius - - space * space)); + sqrt(max( + float16_t(0.0hf), + frag_info.corner_radius * frag_info.corner_radius - space * space)); // Map the linear distance field to the approximate Gaussian integral. f16vec2 integral = IPVec2FastGaussianIntegral( From f7f7398b59f661f55b9f03f579d6de9a4ffbffba Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Tue, 28 Mar 2023 22:14:52 -0700 Subject: [PATCH 3/7] ++ --- impeller/entity/shaders/border_mask_blur.vert | 2 +- impeller/entity/shaders/gaussian_blur/gaussian_blur.vert | 4 ++-- impeller/entity/shaders/rrect_blur.vert | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/impeller/entity/shaders/border_mask_blur.vert b/impeller/entity/shaders/border_mask_blur.vert index 9b87041d22117..74b9ae422483a 100644 --- a/impeller/entity/shaders/border_mask_blur.vert +++ b/impeller/entity/shaders/border_mask_blur.vert @@ -15,7 +15,7 @@ frame_info; in vec2 vertices; in vec2 texture_coords; -out vec2 v_texture_coords; +out f16vec2 v_texture_coords; void main() { gl_Position = frame_info.mvp * vec4(vertices, 0.0, 1.0); diff --git a/impeller/entity/shaders/gaussian_blur/gaussian_blur.vert b/impeller/entity/shaders/gaussian_blur/gaussian_blur.vert index 28b8f29fc8fcc..96b2ccf5c66c8 100644 --- a/impeller/entity/shaders/gaussian_blur/gaussian_blur.vert +++ b/impeller/entity/shaders/gaussian_blur/gaussian_blur.vert @@ -16,8 +16,8 @@ in vec2 vertices; in vec2 texture_coords; in vec2 src_texture_coords; -out vec2 v_texture_coords; -out vec2 v_src_texture_coords; +out f16vec2 v_texture_coords; +out f16vec2 v_src_texture_coords; void main() { gl_Position = frame_info.mvp * vec4(vertices, 0.0, 1.0); diff --git a/impeller/entity/shaders/rrect_blur.vert b/impeller/entity/shaders/rrect_blur.vert index 12ebd0d3a4125..6ca9e06bba4b8 100644 --- a/impeller/entity/shaders/rrect_blur.vert +++ b/impeller/entity/shaders/rrect_blur.vert @@ -11,7 +11,7 @@ frame_info; in vec2 position; -out vec2 v_position; +out f16vec2 v_position; void main() { gl_Position = frame_info.mvp * vec4(position, 0.0, 1.0); From 1fb034f71324bd64989a6c54e85ab5b1d200fd91 Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Wed, 29 Mar 2023 08:58:10 -0700 Subject: [PATCH 4/7] update malioc diff --- impeller/tools/malioc.json | 220 ++++++++++++++++++------------------- 1 file changed, 109 insertions(+), 111 deletions(-) diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index d9e3b4b4226ea..8f535e602f2f4 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -178,7 +178,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 12, "work_registers_used": 15 } } @@ -252,7 +252,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 10, "work_registers_used": 16 } } @@ -326,7 +326,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 10, "work_registers_used": 14 } } @@ -403,7 +403,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -481,7 +481,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -560,7 +560,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -637,7 +637,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -710,7 +710,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 12, "work_registers_used": 15 } } @@ -787,7 +787,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -860,7 +860,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 12, "work_registers_used": 15 } } @@ -938,7 +938,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -1015,7 +1015,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -1088,7 +1088,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 12, "work_registers_used": 15 } } @@ -1166,7 +1166,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -1239,7 +1239,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 14, "work_registers_used": 25 } } @@ -1440,7 +1440,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 5, + "fp16_arithmetic": 44, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -1448,8 +1448,8 @@ "arith_fma" ], "longest_path_cycles": [ - 0.8125, - 0.8125, + 0.875, + 0.875, 0.203125, 0.25, 0.0, @@ -1470,8 +1470,8 @@ "arith_fma" ], "shortest_path_cycles": [ - 0.8125, - 0.8125, + 0.875, + 0.875, 0.203125, 0.25, 0.0, @@ -1483,8 +1483,8 @@ "arith_fma" ], "total_cycles": [ - 0.8125, - 0.8125, + 0.875, + 0.875, 0.203125, 0.25, 0.0, @@ -1495,7 +1495,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 22 + "work_registers_used": 18 } } } @@ -3332,7 +3332,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 14, "work_registers_used": 20 } } @@ -3404,7 +3404,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 14, + "uniform_registers_used": 16, "work_registers_used": 15 } } @@ -5806,7 +5806,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 10, + "fp16_arithmetic": 86, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -5814,9 +5814,9 @@ "arith_fma" ], "longest_path_cycles": [ - 0.8125, - 0.8125, - 0.234375, + 0.90625, + 0.90625, + 0.265625, 0.25, 0.0, 0.25, @@ -5836,9 +5836,9 @@ "arith_fma" ], "shortest_path_cycles": [ - 0.8125, - 0.8125, - 0.203125, + 0.90625, + 0.90625, + 0.234375, 0.25, 0.0, 0.25, @@ -5849,9 +5849,9 @@ "arith_fma" ], "total_cycles": [ - 0.8125, - 0.8125, - 0.234375, + 0.90625, + 0.90625, + 0.265625, 0.25, 0.0, 0.25, @@ -5860,8 +5860,8 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, - "work_registers_used": 32 + "uniform_registers_used": 12, + "work_registers_used": 29 } } }, @@ -5906,7 +5906,7 @@ }, "thread_occupancy": 100, "uniform_registers_used": 1, - "work_registers_used": 3 + "work_registers_used": 2 } } } @@ -6636,7 +6636,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 63, + "fp16_arithmetic": 68, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6662,14 +6662,13 @@ ], "shortest_path_bound_pipelines": [ "arith_total", - "arith_cvt", "arith_sfu", "varying" ], "shortest_path_cycles": [ 0.25, - 0.171875, - 0.25, + 0.15625, + 0.1875, 0.25, 0.0, 0.25, @@ -6684,7 +6683,7 @@ "total_cycles": [ 0.5, 0.359375, - 0.484375, + 0.421875, 0.5, 0.0, 0.5, @@ -6693,7 +6692,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 12, "work_registers_used": 21 } } @@ -6724,7 +6723,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 4.619999885559082, + 3.9600000381469727, 2.0, 0.0 ], @@ -6732,7 +6731,7 @@ "arithmetic" ], "total_cycles": [ - 8.666666984558105, + 8.0, 2.0, 2.0 ] @@ -6757,7 +6756,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 58, + "fp16_arithmetic": 64, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6786,9 +6785,9 @@ "texture" ], "shortest_path_cycles": [ - 0.171875, - 0.171875, - 0.109375, + 0.15625, + 0.15625, + 0.09375, 0.0625, 0.0, 0.25, @@ -6801,7 +6800,7 @@ "total_cycles": [ 0.359375, 0.359375, - 0.234375, + 0.21875, 0.125, 0.0, 0.5, @@ -6811,7 +6810,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 20 + "work_registers_used": 19 } } }, @@ -6841,7 +6840,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 3.299999952316284, + 2.9700000286102295, 2.0, 1.0 ], @@ -6849,14 +6848,14 @@ "arithmetic" ], "total_cycles": [ - 5.333333492279053, + 5.0, 2.0, 2.0 ] }, "thread_occupancy": 100, "uniform_registers_used": 2, - "work_registers_used": 4 + "work_registers_used": 3 } } } @@ -6874,7 +6873,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 61, + "fp16_arithmetic": 70, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6900,12 +6899,13 @@ ], "shortest_path_bound_pipelines": [ "arith_total", - "arith_cvt" + "arith_cvt", + "arith_sfu" ], "shortest_path_cycles": [ - 0.078125, - 0.046875, - 0.078125, + 0.0625, + 0.03125, + 0.0625, 0.0625, 0.0, 0.0, @@ -6918,7 +6918,7 @@ "total_cycles": [ 0.3125, 0.234375, - 0.296875, + 0.28125, 0.3125, 0.0, 0.25, @@ -6958,7 +6958,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 2.9700000286102295, + 2.309999942779541, 1.0, 0.0 ], @@ -6966,7 +6966,7 @@ "arithmetic" ], "total_cycles": [ - 6.666666507720947, + 6.0, 1.0, 1.0 ] @@ -6991,7 +6991,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 57, + "fp16_arithmetic": 66, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -7017,12 +7017,13 @@ ], "shortest_path_bound_pipelines": [ "arith_total", - "arith_cvt" + "arith_cvt", + "arith_sfu" ], "shortest_path_cycles": [ - 0.078125, - 0.046875, - 0.078125, + 0.0625, + 0.03125, + 0.0625, 0.0625, 0.0, 0.0, @@ -7035,7 +7036,7 @@ "total_cycles": [ 0.234375, 0.234375, - 0.203125, + 0.1875, 0.125, 0.0, 0.25, @@ -7045,7 +7046,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 8, - "work_registers_used": 20 + "work_registers_used": 19 } } }, @@ -7075,7 +7076,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 2.309999942779541, + 1.9800000190734863, 1.0, 0.0 ], @@ -7083,14 +7084,14 @@ "arithmetic" ], "total_cycles": [ - 4.333333492279053, + 4.0, 1.0, 1.0 ] }, "thread_occupancy": 100, "uniform_registers_used": 1, - "work_registers_used": 4 + "work_registers_used": 3 } } } @@ -8920,17 +8921,17 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 33, + "fp16_arithmetic": 68, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ "arith_total", - "arith_fma" + "arith_sfu" ], "longest_path_cycles": [ - 1.5125000476837158, - 1.5125000476837158, - 0.546875, + 1.5, + 1.3875000476837158, + 0.737500011920929, 1.5, 0.0, 0.125, @@ -8960,12 +8961,12 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_fma" + "arith_sfu" ], "total_cycles": [ - 1.6375000476837158, - 1.6375000476837158, - 0.578125, + 1.5625, + 1.5125000476837158, + 0.762499988079071, 1.5625, 0.0, 0.125, @@ -8974,7 +8975,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 20, + "uniform_registers_used": 16, "work_registers_used": 32 } } @@ -8989,12 +8990,12 @@ "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ - null + "arithmetic" ], "longest_path_cycles": [ - null, - null, - null + 22.110000610351562, + 1.0, + 0.0 ], "pipelines": [ "arithmetic", @@ -9013,14 +9014,14 @@ "arithmetic" ], "total_cycles": [ - 10.666666984558105, + 10.0, 1.0, 0.0 ] }, "thread_occupancy": 100, - "uniform_registers_used": 1, - "work_registers_used": 4 + "uniform_registers_used": 2, + "work_registers_used": 3 } } } @@ -12273,17 +12274,17 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 37, + "fp16_arithmetic": 65, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ "arith_total", - "arith_fma" + "arith_sfu" ], "longest_path_cycles": [ - 1.5499999523162842, - 1.5499999523162842, - 0.515625, + 1.5, + 1.4249999523162842, + 0.699999988079071, 1.5, 0.0, 0.125, @@ -12313,12 +12314,12 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_fma" + "arith_sfu" ], "total_cycles": [ - 1.6749999523162842, - 1.6749999523162842, - 0.5625, + 1.5625, + 1.5499999523162842, + 0.75, 1.5625, 0.0, 0.125, @@ -12328,7 +12329,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 18, - "work_registers_used": 32 + "work_registers_used": 31 } } } @@ -13477,17 +13478,16 @@ "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ - "varying", "texture" ], "longest_path_cycles": [ 0.15625, 0.15625, - 0.0, + 0.015625, 0.0, 0.0, 0.25, - 0.25 + 0.5 ], "pipelines": [ "arith_total", @@ -13499,36 +13499,34 @@ "texture" ], "shortest_path_bound_pipelines": [ - "varying", "texture" ], "shortest_path_cycles": [ 0.15625, 0.15625, - 0.0, + 0.015625, 0.0, 0.0, 0.25, - 0.25 + 0.5 ], "total_bound_pipelines": [ - "varying", "texture" ], "total_cycles": [ 0.15625, 0.15625, - 0.0, + 0.015625, 0.0, 0.0, 0.25, - 0.25 + 0.5 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, - "work_registers_used": 6 + "uniform_registers_used": 14, + "work_registers_used": 8 } } } From 0eecb918696f65eff5aef67bec3a5dd5d0156aef Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Wed, 29 Mar 2023 10:00:01 -0700 Subject: [PATCH 5/7] revert --- impeller/tools/malioc.json | 345 +++++++++++++++++++++++++------------ 1 file changed, 236 insertions(+), 109 deletions(-) diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index 8f535e602f2f4..9f6418deddff2 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -178,7 +178,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -252,7 +252,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 8, "work_registers_used": 16 } } @@ -326,7 +326,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 8, "work_registers_used": 14 } } @@ -403,7 +403,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -481,7 +481,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -560,7 +560,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -637,7 +637,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 8, "work_registers_used": 15 } } @@ -710,7 +710,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -787,7 +787,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -860,7 +860,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -938,7 +938,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -1015,7 +1015,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 8, "work_registers_used": 15 } } @@ -1088,7 +1088,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -1166,7 +1166,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -1239,7 +1239,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 14, + "uniform_registers_used": 12, "work_registers_used": 25 } } @@ -1440,7 +1440,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 44, + "fp16_arithmetic": 5, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -1448,8 +1448,8 @@ "arith_fma" ], "longest_path_cycles": [ - 0.875, - 0.875, + 0.8125, + 0.8125, 0.203125, 0.25, 0.0, @@ -1470,8 +1470,8 @@ "arith_fma" ], "shortest_path_cycles": [ - 0.875, - 0.875, + 0.8125, + 0.8125, 0.203125, 0.25, 0.0, @@ -1483,8 +1483,8 @@ "arith_fma" ], "total_cycles": [ - 0.875, - 0.875, + 0.8125, + 0.8125, 0.203125, 0.25, 0.0, @@ -1495,7 +1495,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 18 + "work_registers_used": 22 } } } @@ -3332,7 +3332,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 14, + "uniform_registers_used": 12, "work_registers_used": 20 } } @@ -3404,7 +3404,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 16, + "uniform_registers_used": 14, "work_registers_used": 15 } } @@ -5806,7 +5806,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 86, + "fp16_arithmetic": 10, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -5814,9 +5814,9 @@ "arith_fma" ], "longest_path_cycles": [ - 0.90625, - 0.90625, - 0.265625, + 0.8125, + 0.8125, + 0.234375, 0.25, 0.0, 0.25, @@ -5836,9 +5836,9 @@ "arith_fma" ], "shortest_path_cycles": [ - 0.90625, - 0.90625, - 0.234375, + 0.8125, + 0.8125, + 0.203125, 0.25, 0.0, 0.25, @@ -5849,9 +5849,9 @@ "arith_fma" ], "total_cycles": [ - 0.90625, - 0.90625, - 0.265625, + 0.8125, + 0.8125, + 0.234375, 0.25, 0.0, 0.25, @@ -5860,8 +5860,8 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, - "work_registers_used": 29 + "uniform_registers_used": 10, + "work_registers_used": 32 } } }, @@ -5906,7 +5906,7 @@ }, "thread_occupancy": 100, "uniform_registers_used": 1, - "work_registers_used": 2 + "work_registers_used": 3 } } } @@ -6636,7 +6636,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 68, + "fp16_arithmetic": 63, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6662,13 +6662,14 @@ ], "shortest_path_bound_pipelines": [ "arith_total", + "arith_cvt", "arith_sfu", "varying" ], "shortest_path_cycles": [ 0.25, - 0.15625, - 0.1875, + 0.171875, + 0.25, 0.25, 0.0, 0.25, @@ -6683,7 +6684,7 @@ "total_cycles": [ 0.5, 0.359375, - 0.421875, + 0.484375, 0.5, 0.0, 0.5, @@ -6692,7 +6693,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 10, "work_registers_used": 21 } } @@ -6723,7 +6724,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 3.9600000381469727, + 4.619999885559082, 2.0, 0.0 ], @@ -6731,7 +6732,7 @@ "arithmetic" ], "total_cycles": [ - 8.0, + 8.666666984558105, 2.0, 2.0 ] @@ -6756,7 +6757,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 64, + "fp16_arithmetic": 58, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6785,9 +6786,9 @@ "texture" ], "shortest_path_cycles": [ - 0.15625, - 0.15625, - 0.09375, + 0.171875, + 0.171875, + 0.109375, 0.0625, 0.0, 0.25, @@ -6800,7 +6801,7 @@ "total_cycles": [ 0.359375, 0.359375, - 0.21875, + 0.234375, 0.125, 0.0, 0.5, @@ -6810,7 +6811,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 19 + "work_registers_used": 20 } } }, @@ -6840,7 +6841,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 2.9700000286102295, + 3.299999952316284, 2.0, 1.0 ], @@ -6848,14 +6849,14 @@ "arithmetic" ], "total_cycles": [ - 5.0, + 5.333333492279053, 2.0, 2.0 ] }, "thread_occupancy": 100, "uniform_registers_used": 2, - "work_registers_used": 3 + "work_registers_used": 4 } } } @@ -6873,7 +6874,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 70, + "fp16_arithmetic": 61, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6899,13 +6900,12 @@ ], "shortest_path_bound_pipelines": [ "arith_total", - "arith_cvt", - "arith_sfu" + "arith_cvt" ], "shortest_path_cycles": [ - 0.0625, - 0.03125, - 0.0625, + 0.078125, + 0.046875, + 0.078125, 0.0625, 0.0, 0.0, @@ -6918,7 +6918,7 @@ "total_cycles": [ 0.3125, 0.234375, - 0.28125, + 0.296875, 0.3125, 0.0, 0.25, @@ -6958,7 +6958,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 2.309999942779541, + 2.9700000286102295, 1.0, 0.0 ], @@ -6966,7 +6966,7 @@ "arithmetic" ], "total_cycles": [ - 6.0, + 6.666666507720947, 1.0, 1.0 ] @@ -6991,7 +6991,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 66, + "fp16_arithmetic": 57, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -7017,13 +7017,12 @@ ], "shortest_path_bound_pipelines": [ "arith_total", - "arith_cvt", - "arith_sfu" + "arith_cvt" ], "shortest_path_cycles": [ - 0.0625, - 0.03125, - 0.0625, + 0.078125, + 0.046875, + 0.078125, 0.0625, 0.0, 0.0, @@ -7036,7 +7035,7 @@ "total_cycles": [ 0.234375, 0.234375, - 0.1875, + 0.203125, 0.125, 0.0, 0.25, @@ -7046,7 +7045,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 8, - "work_registers_used": 19 + "work_registers_used": 20 } } }, @@ -7076,7 +7075,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 1.9800000190734863, + 2.309999942779541, 1.0, 0.0 ], @@ -7084,14 +7083,14 @@ "arithmetic" ], "total_cycles": [ - 4.0, + 4.333333492279053, 1.0, 1.0 ] }, "thread_occupancy": 100, "uniform_registers_used": 1, - "work_registers_used": 3 + "work_registers_used": 4 } } } @@ -8921,17 +8920,17 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 68, + "fp16_arithmetic": 33, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ "arith_total", - "arith_sfu" + "arith_fma" ], "longest_path_cycles": [ - 1.5, - 1.3875000476837158, - 0.737500011920929, + 1.5125000476837158, + 1.5125000476837158, + 0.546875, 1.5, 0.0, 0.125, @@ -8961,12 +8960,12 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_sfu" + "arith_fma" ], "total_cycles": [ - 1.5625, - 1.5125000476837158, - 0.762499988079071, + 1.6375000476837158, + 1.6375000476837158, + 0.578125, 1.5625, 0.0, 0.125, @@ -8975,7 +8974,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 16, + "uniform_registers_used": 20, "work_registers_used": 32 } } @@ -8990,12 +8989,12 @@ "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ - "arithmetic" + null ], "longest_path_cycles": [ - 22.110000610351562, - 1.0, - 0.0 + null, + null, + null ], "pipelines": [ "arithmetic", @@ -9014,14 +9013,14 @@ "arithmetic" ], "total_cycles": [ - 10.0, + 10.666666984558105, 1.0, 0.0 ] }, "thread_occupancy": 100, - "uniform_registers_used": 2, - "work_registers_used": 3 + "uniform_registers_used": 1, + "work_registers_used": 4 } } } @@ -12274,17 +12273,17 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 65, + "fp16_arithmetic": 37, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ "arith_total", - "arith_sfu" + "arith_fma" ], "longest_path_cycles": [ - 1.5, - 1.4249999523162842, - 0.699999988079071, + 1.5499999523162842, + 1.5499999523162842, + 0.515625, 1.5, 0.0, 0.125, @@ -12314,12 +12313,12 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_sfu" + "arith_fma" ], "total_cycles": [ - 1.5625, - 1.5499999523162842, - 0.75, + 1.6749999523162842, + 1.6749999523162842, + 0.5625, 1.5625, 0.0, 0.125, @@ -12329,7 +12328,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 18, - "work_registers_used": 31 + "work_registers_used": 32 } } } @@ -13478,16 +13477,17 @@ "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ + "varying", "texture" ], "longest_path_cycles": [ 0.15625, 0.15625, - 0.015625, + 0.0, 0.0, 0.0, 0.25, - 0.5 + 0.25 ], "pipelines": [ "arith_total", @@ -13499,34 +13499,36 @@ "texture" ], "shortest_path_bound_pipelines": [ + "varying", "texture" ], "shortest_path_cycles": [ 0.15625, 0.15625, - 0.015625, + 0.0, 0.0, 0.0, 0.25, - 0.5 + 0.25 ], "total_bound_pipelines": [ + "varying", "texture" ], "total_cycles": [ 0.15625, 0.15625, - 0.015625, + 0.0, 0.0, 0.0, 0.25, - 0.5 + 0.25 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 14, - "work_registers_used": 8 + "uniform_registers_used": 12, + "work_registers_used": 6 } } } @@ -13643,6 +13645,131 @@ } } }, + "flutter/impeller/renderer/path_polyline.comp.vkspv": { + "Mali-G78": { + "core": "Mali-G78", + "filename": "flutter/impeller/renderer/path_polyline.comp.vkspv", + "has_uniform_computation": true, + "type": "Compute", + "variants": { + "Main": { + "fp16_arithmetic": 0, + "has_stack_spilling": false, + "performance": { + "longest_path_bound_pipelines": [ + null + ], + "longest_path_cycles": [ + null, + null, + null, + null, + null, + null + ], + "pipelines": [ + "arith_total", + "arith_fma", + "arith_cvt", + "arith_sfu", + "load_store", + "texture" + ], + "shortest_path_bound_pipelines": [ + "load_store" + ], + "shortest_path_cycles": [ + 0.75, + 0.0, + 0.75, + 0.0, + 2.0, + 0.0 + ], + "total_bound_pipelines": [ + "load_store" + ], + "total_cycles": [ + 5.9375, + 2.737499952316284, + 4.824999809265137, + 5.9375, + 35.0, + 0.0 + ] + }, + "shared_storage_used": 12288, + "stack_spill_bytes": 0, + "thread_occupancy": 50, + "uniform_registers_used": 34, + "work_registers_used": 55 + } + } + } + }, + "flutter/impeller/renderer/stroke.comp.vkspv": { + "Mali-G78": { + "core": "Mali-G78", + "filename": "flutter/impeller/renderer/stroke.comp.vkspv", + "has_uniform_computation": true, + "type": "Compute", + "variants": { + "Main": { + "fp16_arithmetic": 0, + "has_stack_spilling": false, + "performance": { + "longest_path_bound_pipelines": [ + "load_store" + ], + "longest_path_cycles": [ + 0.3125, + 0.3125, + 0.1875, + 0.125, + 7.0, + 0.0 + ], + "pipelines": [ + "arith_total", + "arith_fma", + "arith_cvt", + "arith_sfu", + "load_store", + "texture" + ], + "shortest_path_bound_pipelines": [ + "arith_total", + "arith_cvt" + ], + "shortest_path_cycles": [ + 0.0625, + 0.0, + 0.0625, + 0.0, + 0.0, + 0.0 + ], + "total_bound_pipelines": [ + "load_store" + ], + "total_cycles": [ + 0.3125, + 0.3125, + 0.1875, + 0.125, + 7.0, + 0.0 + ] + }, + "shared_storage_used": 0, + "stack_spill_bytes": 0, + "thread_occupancy": 100, + "uniform_registers_used": 10, + "work_registers_used": 24 + } + } + } + }, "flutter/impeller/scene/shaders/gles/skinned.vert.gles": { "Mali-G78": { "core": "Mali-G78", From e471d77c34b8a9107afc518cd5e1eef79f8f9954 Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Wed, 29 Mar 2023 10:04:39 -0700 Subject: [PATCH 6/7] ++ --- impeller/tools/malioc.json | 345 ++++++++++++------------------------- 1 file changed, 109 insertions(+), 236 deletions(-) diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index 9f6418deddff2..8f535e602f2f4 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -178,7 +178,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 12, "work_registers_used": 15 } } @@ -252,7 +252,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 10, "work_registers_used": 16 } } @@ -326,7 +326,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 10, "work_registers_used": 14 } } @@ -403,7 +403,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -481,7 +481,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -560,7 +560,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -637,7 +637,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -710,7 +710,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 12, "work_registers_used": 15 } } @@ -787,7 +787,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -860,7 +860,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 12, "work_registers_used": 15 } } @@ -938,7 +938,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -1015,7 +1015,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -1088,7 +1088,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 12, "work_registers_used": 15 } } @@ -1166,7 +1166,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 6, + "uniform_registers_used": 8, "work_registers_used": 11 } } @@ -1239,7 +1239,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 14, "work_registers_used": 25 } } @@ -1440,7 +1440,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 5, + "fp16_arithmetic": 44, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -1448,8 +1448,8 @@ "arith_fma" ], "longest_path_cycles": [ - 0.8125, - 0.8125, + 0.875, + 0.875, 0.203125, 0.25, 0.0, @@ -1470,8 +1470,8 @@ "arith_fma" ], "shortest_path_cycles": [ - 0.8125, - 0.8125, + 0.875, + 0.875, 0.203125, 0.25, 0.0, @@ -1483,8 +1483,8 @@ "arith_fma" ], "total_cycles": [ - 0.8125, - 0.8125, + 0.875, + 0.875, 0.203125, 0.25, 0.0, @@ -1495,7 +1495,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 22 + "work_registers_used": 18 } } } @@ -3332,7 +3332,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 14, "work_registers_used": 20 } } @@ -3404,7 +3404,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 14, + "uniform_registers_used": 16, "work_registers_used": 15 } } @@ -5806,7 +5806,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 10, + "fp16_arithmetic": 86, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -5814,9 +5814,9 @@ "arith_fma" ], "longest_path_cycles": [ - 0.8125, - 0.8125, - 0.234375, + 0.90625, + 0.90625, + 0.265625, 0.25, 0.0, 0.25, @@ -5836,9 +5836,9 @@ "arith_fma" ], "shortest_path_cycles": [ - 0.8125, - 0.8125, - 0.203125, + 0.90625, + 0.90625, + 0.234375, 0.25, 0.0, 0.25, @@ -5849,9 +5849,9 @@ "arith_fma" ], "total_cycles": [ - 0.8125, - 0.8125, - 0.234375, + 0.90625, + 0.90625, + 0.265625, 0.25, 0.0, 0.25, @@ -5860,8 +5860,8 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, - "work_registers_used": 32 + "uniform_registers_used": 12, + "work_registers_used": 29 } } }, @@ -5906,7 +5906,7 @@ }, "thread_occupancy": 100, "uniform_registers_used": 1, - "work_registers_used": 3 + "work_registers_used": 2 } } } @@ -6636,7 +6636,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 63, + "fp16_arithmetic": 68, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6662,14 +6662,13 @@ ], "shortest_path_bound_pipelines": [ "arith_total", - "arith_cvt", "arith_sfu", "varying" ], "shortest_path_cycles": [ 0.25, - 0.171875, - 0.25, + 0.15625, + 0.1875, 0.25, 0.0, 0.25, @@ -6684,7 +6683,7 @@ "total_cycles": [ 0.5, 0.359375, - 0.484375, + 0.421875, 0.5, 0.0, 0.5, @@ -6693,7 +6692,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 12, "work_registers_used": 21 } } @@ -6724,7 +6723,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 4.619999885559082, + 3.9600000381469727, 2.0, 0.0 ], @@ -6732,7 +6731,7 @@ "arithmetic" ], "total_cycles": [ - 8.666666984558105, + 8.0, 2.0, 2.0 ] @@ -6757,7 +6756,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 58, + "fp16_arithmetic": 64, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6786,9 +6785,9 @@ "texture" ], "shortest_path_cycles": [ - 0.171875, - 0.171875, - 0.109375, + 0.15625, + 0.15625, + 0.09375, 0.0625, 0.0, 0.25, @@ -6801,7 +6800,7 @@ "total_cycles": [ 0.359375, 0.359375, - 0.234375, + 0.21875, 0.125, 0.0, 0.5, @@ -6811,7 +6810,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 12, - "work_registers_used": 20 + "work_registers_used": 19 } } }, @@ -6841,7 +6840,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 3.299999952316284, + 2.9700000286102295, 2.0, 1.0 ], @@ -6849,14 +6848,14 @@ "arithmetic" ], "total_cycles": [ - 5.333333492279053, + 5.0, 2.0, 2.0 ] }, "thread_occupancy": 100, "uniform_registers_used": 2, - "work_registers_used": 4 + "work_registers_used": 3 } } } @@ -6874,7 +6873,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 61, + "fp16_arithmetic": 70, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -6900,12 +6899,13 @@ ], "shortest_path_bound_pipelines": [ "arith_total", - "arith_cvt" + "arith_cvt", + "arith_sfu" ], "shortest_path_cycles": [ - 0.078125, - 0.046875, - 0.078125, + 0.0625, + 0.03125, + 0.0625, 0.0625, 0.0, 0.0, @@ -6918,7 +6918,7 @@ "total_cycles": [ 0.3125, 0.234375, - 0.296875, + 0.28125, 0.3125, 0.0, 0.25, @@ -6958,7 +6958,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 2.9700000286102295, + 2.309999942779541, 1.0, 0.0 ], @@ -6966,7 +6966,7 @@ "arithmetic" ], "total_cycles": [ - 6.666666507720947, + 6.0, 1.0, 1.0 ] @@ -6991,7 +6991,7 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 57, + "fp16_arithmetic": 66, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ @@ -7017,12 +7017,13 @@ ], "shortest_path_bound_pipelines": [ "arith_total", - "arith_cvt" + "arith_cvt", + "arith_sfu" ], "shortest_path_cycles": [ - 0.078125, - 0.046875, - 0.078125, + 0.0625, + 0.03125, + 0.0625, 0.0625, 0.0, 0.0, @@ -7035,7 +7036,7 @@ "total_cycles": [ 0.234375, 0.234375, - 0.203125, + 0.1875, 0.125, 0.0, 0.25, @@ -7045,7 +7046,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 8, - "work_registers_used": 20 + "work_registers_used": 19 } } }, @@ -7075,7 +7076,7 @@ "arithmetic" ], "shortest_path_cycles": [ - 2.309999942779541, + 1.9800000190734863, 1.0, 0.0 ], @@ -7083,14 +7084,14 @@ "arithmetic" ], "total_cycles": [ - 4.333333492279053, + 4.0, 1.0, 1.0 ] }, "thread_occupancy": 100, "uniform_registers_used": 1, - "work_registers_used": 4 + "work_registers_used": 3 } } } @@ -8920,17 +8921,17 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 33, + "fp16_arithmetic": 68, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ "arith_total", - "arith_fma" + "arith_sfu" ], "longest_path_cycles": [ - 1.5125000476837158, - 1.5125000476837158, - 0.546875, + 1.5, + 1.3875000476837158, + 0.737500011920929, 1.5, 0.0, 0.125, @@ -8960,12 +8961,12 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_fma" + "arith_sfu" ], "total_cycles": [ - 1.6375000476837158, - 1.6375000476837158, - 0.578125, + 1.5625, + 1.5125000476837158, + 0.762499988079071, 1.5625, 0.0, 0.125, @@ -8974,7 +8975,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 20, + "uniform_registers_used": 16, "work_registers_used": 32 } } @@ -8989,12 +8990,12 @@ "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ - null + "arithmetic" ], "longest_path_cycles": [ - null, - null, - null + 22.110000610351562, + 1.0, + 0.0 ], "pipelines": [ "arithmetic", @@ -9013,14 +9014,14 @@ "arithmetic" ], "total_cycles": [ - 10.666666984558105, + 10.0, 1.0, 0.0 ] }, "thread_occupancy": 100, - "uniform_registers_used": 1, - "work_registers_used": 4 + "uniform_registers_used": 2, + "work_registers_used": 3 } } } @@ -12273,17 +12274,17 @@ "uses_late_zs_update": false, "variants": { "Main": { - "fp16_arithmetic": 37, + "fp16_arithmetic": 65, "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ "arith_total", - "arith_fma" + "arith_sfu" ], "longest_path_cycles": [ - 1.5499999523162842, - 1.5499999523162842, - 0.515625, + 1.5, + 1.4249999523162842, + 0.699999988079071, 1.5, 0.0, 0.125, @@ -12313,12 +12314,12 @@ ], "total_bound_pipelines": [ "arith_total", - "arith_fma" + "arith_sfu" ], "total_cycles": [ - 1.6749999523162842, - 1.6749999523162842, - 0.5625, + 1.5625, + 1.5499999523162842, + 0.75, 1.5625, 0.0, 0.125, @@ -12328,7 +12329,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 18, - "work_registers_used": 32 + "work_registers_used": 31 } } } @@ -13477,17 +13478,16 @@ "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ - "varying", "texture" ], "longest_path_cycles": [ 0.15625, 0.15625, - 0.0, + 0.015625, 0.0, 0.0, 0.25, - 0.25 + 0.5 ], "pipelines": [ "arith_total", @@ -13499,36 +13499,34 @@ "texture" ], "shortest_path_bound_pipelines": [ - "varying", "texture" ], "shortest_path_cycles": [ 0.15625, 0.15625, - 0.0, + 0.015625, 0.0, 0.0, 0.25, - 0.25 + 0.5 ], "total_bound_pipelines": [ - "varying", "texture" ], "total_cycles": [ 0.15625, 0.15625, - 0.0, + 0.015625, 0.0, 0.0, 0.25, - 0.25 + 0.5 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, - "work_registers_used": 6 + "uniform_registers_used": 14, + "work_registers_used": 8 } } } @@ -13645,131 +13643,6 @@ } } }, - "flutter/impeller/renderer/path_polyline.comp.vkspv": { - "Mali-G78": { - "core": "Mali-G78", - "filename": "flutter/impeller/renderer/path_polyline.comp.vkspv", - "has_uniform_computation": true, - "type": "Compute", - "variants": { - "Main": { - "fp16_arithmetic": 0, - "has_stack_spilling": false, - "performance": { - "longest_path_bound_pipelines": [ - null - ], - "longest_path_cycles": [ - null, - null, - null, - null, - null, - null - ], - "pipelines": [ - "arith_total", - "arith_fma", - "arith_cvt", - "arith_sfu", - "load_store", - "texture" - ], - "shortest_path_bound_pipelines": [ - "load_store" - ], - "shortest_path_cycles": [ - 0.75, - 0.0, - 0.75, - 0.0, - 2.0, - 0.0 - ], - "total_bound_pipelines": [ - "load_store" - ], - "total_cycles": [ - 5.9375, - 2.737499952316284, - 4.824999809265137, - 5.9375, - 35.0, - 0.0 - ] - }, - "shared_storage_used": 12288, - "stack_spill_bytes": 0, - "thread_occupancy": 50, - "uniform_registers_used": 34, - "work_registers_used": 55 - } - } - } - }, - "flutter/impeller/renderer/stroke.comp.vkspv": { - "Mali-G78": { - "core": "Mali-G78", - "filename": "flutter/impeller/renderer/stroke.comp.vkspv", - "has_uniform_computation": true, - "type": "Compute", - "variants": { - "Main": { - "fp16_arithmetic": 0, - "has_stack_spilling": false, - "performance": { - "longest_path_bound_pipelines": [ - "load_store" - ], - "longest_path_cycles": [ - 0.3125, - 0.3125, - 0.1875, - 0.125, - 7.0, - 0.0 - ], - "pipelines": [ - "arith_total", - "arith_fma", - "arith_cvt", - "arith_sfu", - "load_store", - "texture" - ], - "shortest_path_bound_pipelines": [ - "arith_total", - "arith_cvt" - ], - "shortest_path_cycles": [ - 0.0625, - 0.0, - 0.0625, - 0.0, - 0.0, - 0.0 - ], - "total_bound_pipelines": [ - "load_store" - ], - "total_cycles": [ - 0.3125, - 0.3125, - 0.1875, - 0.125, - 7.0, - 0.0 - ] - }, - "shared_storage_used": 0, - "stack_spill_bytes": 0, - "thread_occupancy": 100, - "uniform_registers_used": 10, - "work_registers_used": 24 - } - } - } - }, "flutter/impeller/scene/shaders/gles/skinned.vert.gles": { "Mali-G78": { "core": "Mali-G78", From acdab2fa7c170057cc82b6024fc09af204669c1d Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Wed, 29 Mar 2023 10:12:06 -0700 Subject: [PATCH 7/7] ++ --- impeller/tools/malioc.json | 53 ++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index 8f535e602f2f4..2462e32f89745 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -178,7 +178,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -252,7 +252,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 8, "work_registers_used": 16 } } @@ -326,7 +326,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 8, "work_registers_used": 14 } } @@ -403,7 +403,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -481,7 +481,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -560,7 +560,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -637,7 +637,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 8, "work_registers_used": 15 } } @@ -710,7 +710,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -787,7 +787,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -860,7 +860,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -938,7 +938,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -1015,7 +1015,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 10, + "uniform_registers_used": 8, "work_registers_used": 15 } } @@ -1088,7 +1088,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 12, + "uniform_registers_used": 10, "work_registers_used": 15 } } @@ -1166,7 +1166,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 8, + "uniform_registers_used": 6, "work_registers_used": 11 } } @@ -1239,7 +1239,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 14, + "uniform_registers_used": 12, "work_registers_used": 25 } } @@ -3332,7 +3332,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 14, + "uniform_registers_used": 12, "work_registers_used": 20 } } @@ -3404,7 +3404,7 @@ }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 16, + "uniform_registers_used": 14, "work_registers_used": 15 } } @@ -13478,16 +13478,17 @@ "has_stack_spilling": false, "performance": { "longest_path_bound_pipelines": [ + "varying", "texture" ], "longest_path_cycles": [ 0.15625, 0.15625, - 0.015625, + 0.0, 0.0, 0.0, 0.25, - 0.5 + 0.25 ], "pipelines": [ "arith_total", @@ -13499,34 +13500,36 @@ "texture" ], "shortest_path_bound_pipelines": [ + "varying", "texture" ], "shortest_path_cycles": [ 0.15625, 0.15625, - 0.015625, + 0.0, 0.0, 0.0, 0.25, - 0.5 + 0.25 ], "total_bound_pipelines": [ + "varying", "texture" ], "total_cycles": [ 0.15625, 0.15625, - 0.015625, + 0.0, 0.0, 0.0, 0.25, - 0.5 + 0.25 ] }, "stack_spill_bytes": 0, "thread_occupancy": 100, - "uniform_registers_used": 14, - "work_registers_used": 8 + "uniform_registers_used": 12, + "work_registers_used": 6 } } }