Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion backends/vulkan/runtime/api/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ Context::Context(vkapi::Adapter* adapter, const ContextConfig& config)
image_clearlist_mutex_{},
images_to_clear_{},
preferred_image_tiling_{VK_IMAGE_TILING_OPTIMAL} {
if (adapter_p_->linear_tiling_3d_enabled()) {
// PowerVR GPUs may report linear tiling support for 3D images but not
// handle it correctly in compute shaders (e.g., imageStore may produce
// incorrect results). Force optimal tiling on PowerVR for correctness.
if (adapter_p_->linear_tiling_3d_enabled() &&
adapter_p_->device_type() != vkapi::DeviceType::POWERVR) {
preferred_image_tiling_ = VK_IMAGE_TILING_LINEAR;
}
}
Expand Down
3 changes: 3 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,9 @@ class ComputeGraph final {
inline bool device_is_adreno() {
return context_->adapter_ptr()->device_type() == vkapi::DeviceType::ADRENO;
}
inline bool device_is_powervr() {
return context_->adapter_ptr()->device_type() == vkapi::DeviceType::POWERVR;
}
const std::string& device_name() {
return context()->adapter_ptr()->device_name();
}
Expand Down
43 changes: 19 additions & 24 deletions backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,25 @@ utils::uvec3 conv2d_local_wg_size(
method = Conv2dMethod::SlidingWindow;
}

// PowerVR GPUs may not handle large workgroup sizes well. Use smaller,
// more conservative workgroup sizes to avoid potential hardware issues
// with the TBDR architecture.
if (graph->device_is_powervr()) {
if (method == Conv2dMethod::Pointwise) {
uint32_t local_wg_size_y = 1;
if (global_workgroup_size[1] % 4 == 0) {
local_wg_size_y = 4;
} else if (global_workgroup_size[1] % 2 == 0) {
local_wg_size_y = 2;
}
return {32 / local_wg_size_y, local_wg_size_y, 1};
} else if (method == Conv2dMethod::Depthwise) {
return {32, 1, 1};
} else {
return graph->create_local_wg_size(global_workgroup_size);
}
}

if (method == Conv2dMethod::Pointwise) {
uint32_t local_wg_size_y = 1;
if (global_workgroup_size[1] % 8 == 0) {
Expand Down Expand Up @@ -515,30 +534,6 @@ void add_conv2d_node(
stride_equals_dilation,
stride_1_padding_0);

utils::uvec3 wg_size = create_conv2d_global_wg_size(
graph, method, out, weight_data, stride_equals_dilation);

utils::uvec3 local_wg_size;
if (method == Conv2dMethod::Depthwise || method == Conv2dMethod::Pointwise) {
wg_size = {wg_size[0] * wg_size[1], wg_size[2], 1};
}

if (method == Conv2dMethod::Pointwise) {
uint32_t local_wg_size_y = 1;
if (wg_size[1] % 8 == 0) {
local_wg_size_y = 8;
} else if (wg_size[1] % 4 == 0) {
local_wg_size_y = 4;
} else if (wg_size[1] % 2 == 0) {
local_wg_size_y = 2;
}
local_wg_size = {64 / local_wg_size_y, local_wg_size_y, 1};
} else if (method == Conv2dMethod::Depthwise) {
local_wg_size = {64, 1, 1};
} else {
local_wg_size = graph.create_local_wg_size(wg_size);
}

vkapi::ParamsBindList param_buffers;
std::vector<PushConstantDataInfo> push_constants;
if (method == Conv2dMethod::Pointwise) {
Expand Down
18 changes: 17 additions & 1 deletion backends/vulkan/runtime/vk_api/Adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,20 @@ VkDevice create_logical_device(
enabled_device_extensions,
requested_device_extensions);

// Enable robustBufferAccess on PowerVR devices to provide more well-defined
// behavior for out-of-bounds buffer descriptor accesses. Without this,
// PowerVR drivers may return zeros or undefined values for some edge cases
// in compute shaders. This has a minor performance cost but improves
// correctness.
VkPhysicalDeviceFeatures enabled_features{};
if (physical_device.device_type == DeviceType::POWERVR) {
VkPhysicalDeviceFeatures supported_features{};
vkGetPhysicalDeviceFeatures(physical_device.handle, &supported_features);
if (supported_features.robustBufferAccess == VK_TRUE) {
enabled_features.robustBufferAccess = VK_TRUE;
}
}

VkDeviceCreateInfo device_create_info{
VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // sType
nullptr, // pNext
Expand All @@ -142,7 +156,9 @@ VkDevice create_logical_device(
static_cast<uint32_t>(
enabled_device_extensions.size()), // enabledExtensionCount
enabled_device_extensions.data(), // ppEnabledExtensionNames
nullptr, // pEnabledFeatures
physical_device.device_type == DeviceType::POWERVR
? &enabled_features
: nullptr, // pEnabledFeatures
};

void* extension_list_top = nullptr;
Expand Down
2 changes: 2 additions & 0 deletions backends/vulkan/runtime/vk_api/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ PhysicalDevice::PhysicalDevice(
device_type = DeviceType::NVIDIA;
} else if (device_name.find("mali") != std::string::npos) {
device_type = DeviceType::MALI;
} else if (device_name.find("powervr") != std::string::npos) {
device_type = DeviceType::POWERVR;
}
}

Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/runtime/vk_api/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum class DeviceType : uint32_t {
MALI,
ADRENO,
SWIFTSHADER,
POWERVR,
Copy link

Copilot AI Feb 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description mentions StagingBuffer.h changes (vmaFlushAllocation additions), but these changes are not present in the PR diff. Upon inspection, these changes already exist in the codebase (lines 77, 91, 125 of StagingBuffer.h). Please update the PR description to clarify that the StagingBuffer correctness fixes are already present in the codebase and are not part of this PR's changes.

Copilot uses AI. Check for mistakes.
};

struct PhysicalDevice final {
Expand Down