From 0b8542138a89da89ccd931f9cad75b1a8c4b452a Mon Sep 17 00:00:00 2001 From: Abdelaziz Mahdy Date: Tue, 10 Feb 2026 20:06:00 -0400 Subject: [PATCH 1/2] [ET-VK] Prevent decomposition of activation ops with native shaders Add hardswish, hardsigmoid, hardshrink, and silu to the Vulkan partitioner's ops_not_to_decompose list, and register hardswish and hardsigmoid in the op_registry. These ops have native GLSL shader implementations in the Vulkan backend but were being decomposed by PyTorch's default decomposition table into primitive ops (mul/add/clamp/div with constant tensors) before the partitioner could claim them. The decomposed paths produce NaN/Inf on PowerVR GPUs due to constant tensor loading issues in the decomposed graph. With this fix, to_edge_transform_and_lower() automatically preserves these ops via the partitioner's ops_to_not_decompose() method, allowing the native Vulkan shaders to handle them directly. Tested on Pixel 10 Pro (PowerVR D-Series DXT-48-1536): - MobileNet V3 Small: NaN eliminated (was 1000/1000 NaN, now 0/1000) - Isolated hardswish test: perfect match with XNNPACK reference Fixes #17299 --- backends/vulkan/op_registry.py | 2 ++ backends/vulkan/partitioner/vulkan_partitioner.py | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py index 3dc873ac21c..32c29587505 100644 --- a/backends/vulkan/op_registry.py +++ b/backends/vulkan/op_registry.py @@ -240,6 +240,8 @@ def register_binary_scalar_op(): exir_ops.edge.aten.exp.default, exir_ops.edge.aten.gelu.default, exir_ops.edge.aten.hardshrink.default, + exir_ops.edge.aten.hardsigmoid.default, + exir_ops.edge.aten.hardswish.default, exir_ops.edge.aten.hardtanh.default, exir_ops.edge.aten.neg.default, exir_ops.edge.aten.relu.default, diff --git a/backends/vulkan/partitioner/vulkan_partitioner.py b/backends/vulkan/partitioner/vulkan_partitioner.py index 69308b902e6..c0d1a26380b 100644 --- a/backends/vulkan/partitioner/vulkan_partitioner.py +++ b/backends/vulkan/partitioner/vulkan_partitioner.py @@ -47,6 +47,13 @@ # pyre-ignore ops_not_to_decompose = [ torch.ops.aten.upsample_nearest2d.vec, + # Activation ops with native Vulkan shaders that PyTorch's default + # decomposition table would otherwise decompose into primitive ops. + # The decomposed paths produce NaN/Inf on some GPUs (e.g. PowerVR) + # due to constant tensor loading issues in the decomposed graph. + torch.ops.aten.hardsigmoid.default, + torch.ops.aten.hardswish.default, + torch.ops.aten.hardshrink.default, ] logger: logging.Logger = logging.getLogger("") From 0b8e58c3d8ce10513ef54f119ee468c00fefa720 Mon Sep 17 00:00:00 2001 From: Abdelaziz Mahdy Date: Wed, 11 Feb 2026 21:45:43 -0400 Subject: [PATCH 2/2] Revert accidental merge conflict changes in op_registry.py Restore register_pow_tensor_scalar which was accidentally replaced with a duplicate register_unary_op during merge conflict resolution. --- backends/vulkan/op_registry.py | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/backends/vulkan/op_registry.py b/backends/vulkan/op_registry.py index 9249092ac41..49fa4c59830 100644 --- a/backends/vulkan/op_registry.py +++ b/backends/vulkan/op_registry.py @@ -249,29 +249,8 @@ def register_binaryop_cpp_ops(): # ============================================================================= -@update_features( - [ - exir_ops.edge.aten.abs.default, - exir_ops.edge.aten.clamp.default, - exir_ops.edge.aten.cos.default, - exir_ops.edge.aten.exp.default, - exir_ops.edge.aten.gelu.default, - exir_ops.edge.aten.hardshrink.default, - exir_ops.edge.aten.hardsigmoid.default, - exir_ops.edge.aten.hardswish.default, - exir_ops.edge.aten.hardtanh.default, - exir_ops.edge.aten.neg.default, - exir_ops.edge.aten.relu.default, - exir_ops.edge.aten.sigmoid.default, - exir_ops.edge.aten.sin.default, - exir_ops.edge.aten.sqrt.default, - exir_ops.edge.aten.rsqrt.default, - exir_ops.edge.aten.tanh.default, - exir_ops.edge.aten.round.default, - exir_ops.edge.aten.leaky_relu.default, - ] -) -def register_unary_op(): +@update_features(exir_ops.edge.aten.pow.Tensor_Scalar) +def register_pow_tensor_scalar(): return OpFeatures( inputs_storage=utils.ANY_STORAGE, inputs_dtypes=utils.FP_T,