Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion backends/vulkan/runtime/api/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ Context::Context(vkapi::Adapter* adapter, const ContextConfig& config)
image_clearlist_mutex_{},
images_to_clear_{},
preferred_image_tiling_{VK_IMAGE_TILING_OPTIMAL} {
if (adapter_p_->linear_tiling_3d_enabled()) {
// PowerVR GPUs may report linear tiling support for 3D images but not
// handle it correctly in compute shaders (e.g., imageStore may produce
// incorrect results). Force optimal tiling on PowerVR for correctness.
if (adapter_p_->linear_tiling_3d_enabled() &&
adapter_p_->device_type() != vkapi::DeviceType::POWERVR) {
preferred_image_tiling_ = VK_IMAGE_TILING_LINEAR;
}
}
Expand Down
13 changes: 12 additions & 1 deletion backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,11 @@ void ComputeGraph::prepack() {
int i = 0;
bool submitted = false;
const bool reduce_peak_memory = total_constant_nbytes_ > 500 * MB;
// int count = 0;
// On PowerVR GPUs, batching multiple prepack compute dispatches in a single
// command buffer produces incorrect results for the second and subsequent
// constants. Submit and wait after each prepack node to work around this
// driver issue.
const bool serialize_prepack = device_is_powervr();
context_->set_cmd();
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
// Do not trigger on the first or last prepack node.
Expand All @@ -1142,6 +1146,13 @@ void ComputeGraph::prepack() {

node->encode(this);
i++;

if (serialize_prepack && i < static_cast<int>(prepack_nodes_.size())) {
submit_current_cmd_and_wait();
context_->flush();
staging_nbytes_in_cmd_ = 0;
context_->set_cmd();
}
}
submit_current_cmd_and_wait(/*final_use=*/true);
context_->flush();
Expand Down
3 changes: 3 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,9 @@ class ComputeGraph final {
inline bool device_is_adreno() {
return context_->adapter_ptr()->device_type() == vkapi::DeviceType::ADRENO;
}
inline bool device_is_powervr() {
return context_->adapter_ptr()->device_type() == vkapi::DeviceType::POWERVR;
}
const std::string& device_name() {
return context()->adapter_ptr()->device_name();
}
Expand Down
43 changes: 19 additions & 24 deletions backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,25 @@ utils::uvec3 conv2d_local_wg_size(
method = Conv2dMethod::SlidingWindow;
}

// PowerVR GPUs may not handle large workgroup sizes well. Use smaller,
// more conservative workgroup sizes to avoid potential hardware issues
// with the TBDR architecture.
if (graph->device_is_powervr()) {
if (method == Conv2dMethod::Pointwise) {
uint32_t local_wg_size_y = 1;
if (global_workgroup_size[1] % 4 == 0) {
local_wg_size_y = 4;
} else if (global_workgroup_size[1] % 2 == 0) {
local_wg_size_y = 2;
}
return {32 / local_wg_size_y, local_wg_size_y, 1};
} else if (method == Conv2dMethod::Depthwise) {
return {32, 1, 1};
} else {
return graph->create_local_wg_size(global_workgroup_size);
}
}

if (method == Conv2dMethod::Pointwise) {
uint32_t local_wg_size_y = 1;
if (global_workgroup_size[1] % 8 == 0) {
Expand Down Expand Up @@ -515,30 +534,6 @@ void add_conv2d_node(
stride_equals_dilation,
stride_1_padding_0);

utils::uvec3 wg_size = create_conv2d_global_wg_size(
graph, method, out, weight_data, stride_equals_dilation);

utils::uvec3 local_wg_size;
if (method == Conv2dMethod::Depthwise || method == Conv2dMethod::Pointwise) {
wg_size = {wg_size[0] * wg_size[1], wg_size[2], 1};
}

if (method == Conv2dMethod::Pointwise) {
uint32_t local_wg_size_y = 1;
if (wg_size[1] % 8 == 0) {
local_wg_size_y = 8;
} else if (wg_size[1] % 4 == 0) {
local_wg_size_y = 4;
} else if (wg_size[1] % 2 == 0) {
local_wg_size_y = 2;
}
local_wg_size = {64 / local_wg_size_y, local_wg_size_y, 1};
} else if (method == Conv2dMethod::Depthwise) {
local_wg_size = {64, 1, 1};
} else {
local_wg_size = graph.create_local_wg_size(wg_size);
}

vkapi::ParamsBindList param_buffers;
std::vector<PushConstantDataInfo> push_constants;
if (method == Conv2dMethod::Pointwise) {
Expand Down
18 changes: 17 additions & 1 deletion backends/vulkan/runtime/vk_api/Adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,20 @@ VkDevice create_logical_device(
enabled_device_extensions,
requested_device_extensions);

// Enable robustBufferAccess on PowerVR devices to provide more well-defined
// behavior for out-of-bounds buffer descriptor accesses. Without this,
// PowerVR drivers may return zeros or undefined values for some edge cases
// in compute shaders. This has a minor performance cost but improves
// correctness.
VkPhysicalDeviceFeatures enabled_features{};
if (physical_device.device_type == DeviceType::POWERVR) {
VkPhysicalDeviceFeatures supported_features{};
vkGetPhysicalDeviceFeatures(physical_device.handle, &supported_features);
if (supported_features.robustBufferAccess == VK_TRUE) {
enabled_features.robustBufferAccess = VK_TRUE;
}
}

VkDeviceCreateInfo device_create_info{
VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // sType
nullptr, // pNext
Expand All @@ -142,7 +156,9 @@ VkDevice create_logical_device(
static_cast<uint32_t>(
enabled_device_extensions.size()), // enabledExtensionCount
enabled_device_extensions.data(), // ppEnabledExtensionNames
nullptr, // pEnabledFeatures
physical_device.device_type == DeviceType::POWERVR
? &enabled_features
: nullptr, // pEnabledFeatures
};

void* extension_list_top = nullptr;
Expand Down
2 changes: 2 additions & 0 deletions backends/vulkan/runtime/vk_api/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ PhysicalDevice::PhysicalDevice(
device_type = DeviceType::NVIDIA;
} else if (device_name.find("mali") != std::string::npos) {
device_type = DeviceType::MALI;
} else if (device_name.find("powervr") != std::string::npos) {
device_type = DeviceType::POWERVR;
}
}

Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/runtime/vk_api/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum class DeviceType : uint32_t {
MALI,
ADRENO,
SWIFTSHADER,
POWERVR,
};

struct PhysicalDevice final {
Expand Down
Loading