diff --git a/backends/vulkan/runtime/api/Context.cpp b/backends/vulkan/runtime/api/Context.cpp
index 80aef97fc04..f6740e189b3 100644
--- a/backends/vulkan/runtime/api/Context.cpp
+++ b/backends/vulkan/runtime/api/Context.cpp
@@ -46,7 +46,11 @@ Context::Context(vkapi::Adapter* adapter, const ContextConfig& config)
       image_clearlist_mutex_{},
       images_to_clear_{},
       preferred_image_tiling_{VK_IMAGE_TILING_OPTIMAL} {
-  if (adapter_p_->linear_tiling_3d_enabled()) {
+  // PowerVR GPUs may report linear tiling support for 3D images but not
+  // handle it correctly in compute shaders (e.g., imageStore may produce
+  // incorrect results). Force optimal tiling on PowerVR for correctness.
+  if (adapter_p_->linear_tiling_3d_enabled() &&
+      adapter_p_->device_type() != vkapi::DeviceType::POWERVR) {
     preferred_image_tiling_ = VK_IMAGE_TILING_LINEAR;
   }
 }
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
index bb2df30a174..3552b300d63 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.cpp
+++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -1115,7 +1115,11 @@ void ComputeGraph::prepack() {
   int i = 0;
   bool submitted = false;
   const bool reduce_peak_memory = total_constant_nbytes_ > 500 * MB;
-  // int count = 0;
+  // On PowerVR GPUs, batching multiple prepack compute dispatches in a single
+  // command buffer produces incorrect results for the second and subsequent
+  // constants. Submit and wait after each prepack node to work around this
+  // driver issue.
+  const bool serialize_prepack = device_is_powervr();
   context_->set_cmd();
   for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
     // Do not trigger on the first or last prepack node.
@@ -1142,6 +1146,13 @@ void ComputeGraph::prepack() {
 
     node->encode(this);
     i++;
+
+    if (serialize_prepack && i < static_cast<int>(prepack_nodes_.size())) {
+      submit_current_cmd_and_wait();
+      context_->flush();
+      staging_nbytes_in_cmd_ = 0;
+      context_->set_cmd();
+    }
   }
   submit_current_cmd_and_wait(/*final_use=*/true);
   context_->flush();
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
index a7c8cffffd1..1b44a032850 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.h
+++ b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -661,6 +661,9 @@ class ComputeGraph final {
   inline bool device_is_adreno() {
     return context_->adapter_ptr()->device_type() == vkapi::DeviceType::ADRENO;
   }
+  inline bool device_is_powervr() {
+    return context_->adapter_ptr()->device_type() == vkapi::DeviceType::POWERVR;
+  }
   const std::string& device_name() {
     return context()->adapter_ptr()->device_name();
   }
diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
index efd61848af1..79537d72adc 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
@@ -401,6 +401,25 @@ utils::uvec3 conv2d_local_wg_size(
     method = Conv2dMethod::SlidingWindow;
   }
 
+  // PowerVR GPUs may not handle large workgroup sizes well. Use smaller,
+  // more conservative workgroup sizes to avoid potential hardware issues
+  // with the TBDR architecture.
+  if (graph->device_is_powervr()) {
+    if (method == Conv2dMethod::Pointwise) {
+      uint32_t local_wg_size_y = 1;
+      if (global_workgroup_size[1] % 4 == 0) {
+        local_wg_size_y = 4;
+      } else if (global_workgroup_size[1] % 2 == 0) {
+        local_wg_size_y = 2;
+      }
+      return {32 / local_wg_size_y, local_wg_size_y, 1};
+    } else if (method == Conv2dMethod::Depthwise) {
+      return {32, 1, 1};
+    } else {
+      return graph->create_local_wg_size(global_workgroup_size);
+    }
+  }
+
   if (method == Conv2dMethod::Pointwise) {
     uint32_t local_wg_size_y = 1;
     if (global_workgroup_size[1] % 8 == 0) {
@@ -515,30 +534,6 @@ void add_conv2d_node(
       stride_equals_dilation,
       stride_1_padding_0);
 
-  utils::uvec3 wg_size = create_conv2d_global_wg_size(
-      graph, method, out, weight_data, stride_equals_dilation);
-
-  utils::uvec3 local_wg_size;
-  if (method == Conv2dMethod::Depthwise || method == Conv2dMethod::Pointwise) {
-    wg_size = {wg_size[0] * wg_size[1], wg_size[2], 1};
-  }
-
-  if (method == Conv2dMethod::Pointwise) {
-    uint32_t local_wg_size_y = 1;
-    if (wg_size[1] % 8 == 0) {
-      local_wg_size_y = 8;
-    } else if (wg_size[1] % 4 == 0) {
-      local_wg_size_y = 4;
-    } else if (wg_size[1] % 2 == 0) {
-      local_wg_size_y = 2;
-    }
-    local_wg_size = {64 / local_wg_size_y, local_wg_size_y, 1};
-  } else if (method == Conv2dMethod::Depthwise) {
-    local_wg_size = {64, 1, 1};
-  } else {
-    local_wg_size = graph.create_local_wg_size(wg_size);
-  }
-
   vkapi::ParamsBindList param_buffers;
   std::vector<PushConstantDataInfo> push_constants;
   if (method == Conv2dMethod::Pointwise) {
diff --git a/backends/vulkan/runtime/vk_api/Adapter.cpp b/backends/vulkan/runtime/vk_api/Adapter.cpp
index 0a5b1601dea..96805d1b391 100644
--- a/backends/vulkan/runtime/vk_api/Adapter.cpp
+++ b/backends/vulkan/runtime/vk_api/Adapter.cpp
@@ -131,6 +131,20 @@ VkDevice create_logical_device(
       enabled_device_extensions,
       requested_device_extensions);
 
+  // Enable robustBufferAccess on PowerVR devices to provide more well-defined
+  // behavior for out-of-bounds buffer descriptor accesses. Without this,
+  // PowerVR drivers may return zeros or undefined values for some edge cases
+  // in compute shaders. This has a minor performance cost but improves
+  // correctness.
+  VkPhysicalDeviceFeatures enabled_features{};
+  if (physical_device.device_type == DeviceType::POWERVR) {
+    VkPhysicalDeviceFeatures supported_features{};
+    vkGetPhysicalDeviceFeatures(physical_device.handle, &supported_features);
+    if (supported_features.robustBufferAccess == VK_TRUE) {
+      enabled_features.robustBufferAccess = VK_TRUE;
+    }
+  }
+
   VkDeviceCreateInfo device_create_info{
       VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // sType
       nullptr, // pNext
@@ -142,7 +156,9 @@ VkDevice create_logical_device(
       static_cast<uint32_t>(
           enabled_device_extensions.size()), // enabledExtensionCount
       enabled_device_extensions.data(), // ppEnabledExtensionNames
-      nullptr, // pEnabledFeatures
+      physical_device.device_type == DeviceType::POWERVR
+          ? &enabled_features
+          : nullptr, // pEnabledFeatures
   };
 
   void* extension_list_top = nullptr;
diff --git a/backends/vulkan/runtime/vk_api/Device.cpp b/backends/vulkan/runtime/vk_api/Device.cpp
index 249038ed51e..7893ea4e312 100644
--- a/backends/vulkan/runtime/vk_api/Device.cpp
+++ b/backends/vulkan/runtime/vk_api/Device.cpp
@@ -126,6 +126,8 @@ PhysicalDevice::PhysicalDevice(
     device_type = DeviceType::NVIDIA;
   } else if (device_name.find("mali") != std::string::npos) {
     device_type = DeviceType::MALI;
+  } else if (device_name.find("powervr") != std::string::npos) {
+    device_type = DeviceType::POWERVR;
   }
 }
 
diff --git a/backends/vulkan/runtime/vk_api/Device.h b/backends/vulkan/runtime/vk_api/Device.h
index 2bc3075ffb4..dc59367d03f 100644
--- a/backends/vulkan/runtime/vk_api/Device.h
+++ b/backends/vulkan/runtime/vk_api/Device.h
@@ -24,6 +24,7 @@ enum class DeviceType : uint32_t {
   MALI,
   ADRENO,
   SWIFTSHADER,
+  POWERVR,
 };
 
 struct PhysicalDevice final {