diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index 351b920e805..2aa533f7aa8 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -564,14 +564,19 @@ size_t calculate_max_ubo_nbytes( const size_t min_nbytes_per_ubo, const utils::StorageType storage_type) { size_t ivec4_ubo_nbytes = utils::align_up(size_t(16), min_nbytes_per_ubo); - size_t uvec3_ubo_nbytes = utils::align_up(size_t(12), min_nbytes_per_ubo); + // TextureLimits has alignas(16) so sizeof(TextureLimits) == 16, not 12. + // Use 16 to match the actual sizeof used by metadata_ubo_impl(). + size_t uvec3_ubo_nbytes = utils::align_up(size_t(16), min_nbytes_per_ubo); size_t int32_ubo_nbytes = utils::align_up(size_t(4), min_nbytes_per_ubo); if (storage_type == utils::kBuffer) { // sizes, strides, dim order, numel return 3 * ivec4_ubo_nbytes + int32_ubo_nbytes; } - // sizes, logical limits - return ivec4_ubo_nbytes + uvec3_ubo_nbytes; + // sizes, strides, dim_order, numel, logical_limits + // Ops like Linear and MatMul unconditionally request strides/numel UBOs on + // all tensors regardless of storage type, so texture tensors need the same + // metadata budget as buffer tensors plus logical_limits. + return 3 * ivec4_ubo_nbytes + int32_ubo_nbytes + uvec3_ubo_nbytes; } // @@ -1161,9 +1166,10 @@ bool vTensor::is_contiguous() const { } size_t vTensor::get_max_ubo_nbytes(const size_t nbytes_per_ubo) const { - // For texture backed tensors, the metadata fields needed are: - // sizes, logical limits - size_t max_metadata_field_count = 2u; + // Ops like Linear and MatMul unconditionally request strides/numel UBOs on + // all tensors regardless of storage type, so texture tensors need the same + // metadata budget as buffer tensors plus logical_limits (5 fields total). + size_t max_metadata_field_count = 5u; if (storage_type() == utils::kBuffer) { // sizes, strides, dim order, numel max_metadata_field_count = 4u; diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 20c9ac2b14f..261d3f72d01 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -920,6 +920,56 @@ TEST_F(VulkanComputeAPITest, tensor_layout_metadata_test_against_golden) { } } +// Test that texture-backed tensors can serve all metadata UBO requests +// (sizes, strides, dim_order, numel, logical_limits) without exceeding the +// pre-allocated UBO budget. This is a regression test for an issue where +// calculate_max_ubo_nbytes() only allocated 2 fields for texture tensors +// (sizes + logical_limits), but operators like Linear/MatMul unconditionally +// request strides_ubo() and numel_ubo() on all tensors regardless of storage +// type, causing an assertion failure: +// "Uniform data allocation has exceeded Tensor uniform buffer size" +TEST_F(VulkanComputeAPITest, texture_tensor_ubo_metadata_budget_test) { + // Create a texture-backed tensor (the default for most Vulkan ops) + std::vector sizes = {4, 8, 8}; + vTensor texture_tensor = vTensor( + context(), + sizes, + vkapi::kFloat, + utils::StorageType::TEXTURE_3D, + utils::GPUMemoryLayout::TENSOR_CHANNELS_PACKED); + + // These two UBOs are within the original 2-field texture budget: + // Field 1: sizes (ivec4) + EXPECT_NO_THROW(texture_tensor.sizes_ubo()); + // Field 2: logical_limits (uvec3) + EXPECT_NO_THROW(texture_tensor.logical_limits_ubo()); + + // These UBOs exceed the original 2-field texture budget but are + // unconditionally requested by ops like Linear, MatMul, etc. + // Without the fix, these will trigger: + // VK_CHECK_COND((uniforms_size_ + ubo_nbytes) <= max_ubo_nbytes_) + // Field 3: strides (ivec4) - FAILS without fix + EXPECT_NO_THROW(texture_tensor.strides_ubo()); + // Field 4: numel (int32) - FAILS without fix + EXPECT_NO_THROW(texture_tensor.numel_ubo()); + // Field 5: dim_order (ivec4) - FAILS without fix + EXPECT_NO_THROW(texture_tensor.dim_order_ubo()); + + // Also verify a buffer-backed tensor still works (should always have had + // enough budget for all 4+ fields) + vTensor buffer_tensor = vTensor( + context(), + sizes, + vkapi::kFloat, + utils::StorageType::BUFFER, + utils::GPUMemoryLayout::TENSOR_CHANNELS_PACKED); + + EXPECT_NO_THROW(buffer_tensor.sizes_ubo()); + EXPECT_NO_THROW(buffer_tensor.strides_ubo()); + EXPECT_NO_THROW(buffer_tensor.dim_order_ubo()); + EXPECT_NO_THROW(buffer_tensor.numel_ubo()); +} + TEST_F(VulkanComputeAPITest, virtual_transpose_test) { std::vector sizes = {7, 9, 11, 13}; // (dim0, dim1), new_sizes, new_dim_order, new_axis_map, new_packed_dim_idx