diff --git a/backends/vulkan/runtime/api/QueryPool.cpp b/backends/vulkan/runtime/api/QueryPool.cpp index ec6e15404c7..03d4115c495 100644 --- a/backends/vulkan/runtime/api/QueryPool.cpp +++ b/backends/vulkan/runtime/api/QueryPool.cpp @@ -111,7 +111,7 @@ void QueryPool::shader_profile_begin( uint32_t query_idx = write_timestamp(cmd); ShaderDuration log_entry{ - api::utils::safe_downcast(shader_durations_.size()), + utils::safe_downcast(shader_durations_.size()), // Execution Properties dispatch_id, kernel_name, diff --git a/backends/vulkan/runtime/api/Tensor.cpp b/backends/vulkan/runtime/api/Tensor.cpp index 0b77a066f06..819eb7fc0af 100644 --- a/backends/vulkan/runtime/api/Tensor.cpp +++ b/backends/vulkan/runtime/api/Tensor.cpp @@ -23,9 +23,8 @@ std::vector calculate_strides( size_t ndim = sizes.size(); std::vector strides(ndim); - const int64_t last_dim_size = texel_strides - ? api::utils::div_up_4(sizes.at(last_dim)) - : sizes.at(last_dim); + const int64_t last_dim_size = + texel_strides ? utils::div_up_4(sizes.at(last_dim)) : sizes.at(last_dim); for (int stride_d = ndim - 1; stride_d >= 0; stride_d--) { strides.at(stride_d) = 1; @@ -51,31 +50,30 @@ std::vector calculate_padded_sizes( } // Tensor sizes will be unsqueezed up to the next multiple of 4 - const int64_t ndim_up4 = api::utils::align_up_4(ndim); + const int64_t ndim_up4 = utils::align_up_4(ndim); std::vector padded_sizes(ndim_up4); for (int64_t i = 0; i < ndim_up4; ++i) { - padded_sizes.at(i) = api::utils::val_at(i - ndim_up4, sizes); + padded_sizes.at(i) = utils::val_at(i - ndim_up4, sizes); } // Pad the packed dim to the next multiple of 4. const int64_t dim_offset = api::to_packed_dim_nchw_offset(memory_layout); - const int64_t padded_dim_size = api::utils::val_at(-dim_offset, sizes); - padded_sizes.at(ndim_up4 - dim_offset) = - api::utils::align_up_4(padded_dim_size); + const int64_t padded_dim_size = utils::val_at(-dim_offset, sizes); + padded_sizes.at(ndim_up4 - dim_offset) = utils::align_up_4(padded_dim_size); return padded_sizes; } -api::utils::uvec3 calculate_image_extents( +utils::uvec3 calculate_image_extents( const std::vector& padded_sizes, const api::GPUMemoryLayout memory_layout) { VK_CHECK_COND(padded_sizes.size() == 4); - uint32_t N = api::utils::safe_downcast(padded_sizes.at(0)); - uint32_t C = api::utils::safe_downcast(padded_sizes.at(1)); - uint32_t H = api::utils::safe_downcast(padded_sizes.at(2)); - uint32_t W = api::utils::safe_downcast(padded_sizes.at(3)); + uint32_t N = utils::safe_downcast(padded_sizes.at(0)); + uint32_t C = utils::safe_downcast(padded_sizes.at(1)); + uint32_t H = utils::safe_downcast(padded_sizes.at(2)); + uint32_t W = utils::safe_downcast(padded_sizes.at(3)); switch (memory_layout) { case api::kWidthPacked: @@ -126,10 +124,10 @@ vTensor::vTensor( dtype_, allocate_memory) { if (storage_type != api::kBuffer) { - texture_limits_.limits = api::utils::ivec3{ - api::utils::safe_downcast(storage_.image_extents_.data[0]), - api::utils::safe_downcast(storage_.image_extents_.data[1]), - api::utils::safe_downcast(storage_.image_extents_.data[2])}; + texture_limits_.limits = utils::ivec3{ + utils::safe_downcast(storage_.image_extents_.data[0]), + utils::safe_downcast(storage_.image_extents_.data[1]), + utils::safe_downcast(storage_.image_extents_.data[2])}; } if (dtype == api::kHalf) { @@ -172,8 +170,8 @@ api::VulkanBuffer& vTensor::buffer( const api::BufferBindInfo vTensor::sizes_ubo() { if (!sizes_uniform_.buffer()) { - sizes_uniform_ = api::ParamsBuffer( - storage_.context_, api::utils::make_whcn_ivec4(sizes_)); + sizes_uniform_ = + api::ParamsBuffer(storage_.context_, utils::make_whcn_ivec4(sizes_)); } return api::BufferBindInfo(sizes_uniform_.buffer()); } @@ -190,7 +188,7 @@ const api::BufferBindInfo vTensor::texel_strides_ubo() { if (!texel_strides_uniform_.buffer()) { texel_strides_uniform_ = api::ParamsBuffer( storage_.context_, - api::utils::make_whcn_ivec4( + utils::make_whcn_ivec4( calculate_strides(padded_sizes_, memory_layout_))); } return api::BufferBindInfo(texel_strides_uniform_.buffer()); @@ -243,23 +241,23 @@ void vTensor::update_size_metadata(const std::vector& new_sizes) { // Calculate the extents of the image texture that would have been required // for a tensor of the new sizes. - api::utils::uvec3 virtual_extents = + utils::uvec3 virtual_extents = calculate_image_extents(padded_sizes_, memory_layout_); // Update the texture limits to reflect the new virtual extents. - texture_limits_.limits = api::utils::ivec3{ - api::utils::safe_downcast(virtual_extents.data[0]), - api::utils::safe_downcast(virtual_extents.data[1]), - api::utils::safe_downcast(virtual_extents.data[2])}; + texture_limits_.limits = utils::ivec3{ + utils::safe_downcast(virtual_extents.data[0]), + utils::safe_downcast(virtual_extents.data[1]), + utils::safe_downcast(virtual_extents.data[2])}; if (sizes_uniform_.buffer()) { - sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_)); + sizes_uniform_.update(utils::make_whcn_ivec4(sizes_)); } if (texture_limits_uniform_.buffer()) { texture_limits_uniform_.update(texture_limits_); } if (texel_strides_uniform_.buffer()) { - texel_strides_uniform_.update(api::utils::make_whcn_ivec4( + texel_strides_uniform_.update(utils::make_whcn_ivec4( calculate_strides(padded_sizes_, memory_layout_))); } if (ntexels_uniform_.buffer()) { @@ -279,7 +277,7 @@ void vTensor::virtual_resize(const std::vector& new_sizes) { if (storage_type() != api::kBuffer) { // For texture storage check that the current texture is large enough for // the new sizes of the tensor. - api::utils::uvec3 virtual_extents = + utils::uvec3 virtual_extents = calculate_image_extents(padded_sizes_, memory_layout_); bool valid_resize = virtual_extents.data[0] <= image_extents().data[0]; @@ -302,7 +300,7 @@ void vTensor::virtual_resize(const std::vector& new_sizes) { api::VulkanImage allocate_image( api::Context* const context_ptr, - api::utils::uvec3& image_extents, + utils::uvec3& image_extents, const api::StorageType storage_type, const VkFormat image_format, const bool allocate_memory) { @@ -375,7 +373,7 @@ vTensorStorage::vTensorStorage( : context_(context), storage_type_{storage_type}, image_extents_(calculate_image_extents(padded_sizes, gpu_memory_layout)), - buffer_length_{api::utils::multiply_integers(padded_sizes)}, + buffer_length_{utils::multiply_integers(padded_sizes)}, image_(allocate_image( context_, image_extents_, @@ -474,7 +472,7 @@ void vTensorStorage::discard_and_reallocate( api::to_vkformat(dtype), image_owns_memory); - buffer_length_ = api::utils::multiply_integers(padded_sizes); + buffer_length_ = utils::multiply_integers(padded_sizes); buffer_ = allocate_buffer( context_, buffer_length_, storage_type_, dtype, buffer_owns_memory); } diff --git a/backends/vulkan/runtime/api/Tensor.h b/backends/vulkan/runtime/api/Tensor.h index 1d57e35ba3f..95e059fc71f 100644 --- a/backends/vulkan/runtime/api/Tensor.h +++ b/backends/vulkan/runtime/api/Tensor.h @@ -54,7 +54,7 @@ std::vector calculate_padded_sizes( * Given the padded sizes of a tensor and the GPU memory layout, calculate the * 3D image extents required to store the tensor data as an image texture. */ -api::utils::uvec3 calculate_image_extents( +utils::uvec3 calculate_image_extents( const std::vector& padded_sizes, const api::GPUMemoryLayout memory_layout); @@ -102,7 +102,7 @@ class vTensorStorage final { api::StorageType storage_type_; // Resource sizings - api::utils::uvec3 image_extents_{}; + utils::uvec3 image_extents_{}; int64_t buffer_length_{}; // GPU Storage @@ -141,7 +141,7 @@ class vTensor final { // Alignment is required to conform with Vulkan specification; a 3 or 4 // component vector with components of size N must have base alignment of // 4N. - alignas(16) api::utils::ivec3 limits; + alignas(16) utils::ivec3 limits; }; public: @@ -231,7 +231,7 @@ class vTensor final { return storage_.storage_type_ == api::kBuffer; } - inline const api::utils::uvec3& image_extents() const { + inline const utils::uvec3& image_extents() const { return storage_.image_extents_; } @@ -291,12 +291,12 @@ class vTensor final { */ const api::BufferBindInfo ntexels_ubo(); - inline const api::utils::ivec3 texture_limits() const { + inline const utils::ivec3 texture_limits() const { return texture_limits_.limits; } inline size_t numel() const { - return api::utils::multiply_integers(sizes()); + return utils::multiply_integers(sizes()); } inline size_t nbytes() const { @@ -307,7 +307,7 @@ class vTensor final { * Returns numel but based on padded_sizes_ instead of sizes_ */ inline size_t gpu_numel() const { - return api::utils::multiply_integers(padded_sizes_); + return utils::multiply_integers(padded_sizes_); } /* @@ -315,7 +315,7 @@ class vTensor final { * store the tensor's data. */ inline int32_t texel_numel() const { - return api::utils::safe_downcast(gpu_numel() / 4); + return utils::safe_downcast(gpu_numel() / 4); } /* diff --git a/backends/vulkan/runtime/api/Utils.h b/backends/vulkan/runtime/api/Utils.h index b327f3153e5..4bf4dc271ae 100644 --- a/backends/vulkan/runtime/api/Utils.h +++ b/backends/vulkan/runtime/api/Utils.h @@ -27,8 +27,6 @@ #endif //_MSC_VER namespace vkcompute { -namespace api { - namespace utils { // @@ -299,7 +297,7 @@ inline detail::vec divup_vec( const detail::vec& b) { detail::vec result; for (uint32_t i = 0; i < N; ++i) { - result.data[i] = api::utils::div_up(a.data[i], b.data[i]); + result.data[i] = utils::div_up(a.data[i], b.data[i]); } return result; } @@ -462,6 +460,8 @@ inline int64_t multiply_integers(Iter begin, Iter end) { } // namespace utils +namespace api { + inline VkExtent3D create_extent3d(const utils::uvec3& extents) { return VkExtent3D{extents.data[0u], extents.data[1u], extents.data[2u]}; } diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index f1b8f24d2a0..06c09d05ca4 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -106,7 +106,7 @@ api::GPUMemoryLayout ComputeGraph::suggested_memory_layout( } // For 3 dimensional tensors that only have a channels dimension of 1, still // prefer width packed. - if (api::utils::val_at(-3, sizes) == 1) { + if (utils::val_at(-3, sizes) == 1) { return api::kWidthPacked; } return api::kChannelsPacked; @@ -312,14 +312,14 @@ void ComputeGraph::update_descriptor_counts( } } -api::utils::uvec3 ComputeGraph::create_global_wg_size(const ValueRef idx) { +utils::uvec3 ComputeGraph::create_global_wg_size(const ValueRef idx) { if (is_buffer_storage(idx)) { return {uint32_t(texel_numel_of(idx)), 1u, 1u}; } return image_extents_of(idx); } -api::utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) { +utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) { if (config_.enable_local_wg_size_override) { return config_.local_wg_size_override; } @@ -328,8 +328,8 @@ api::utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) { return {64u, 1u, 1u}; } - const api::utils::uvec3 image_extents = image_extents_of(idx); - api::utils::uvec3 local_group_size = {4, 4, 4}; + const utils::uvec3 image_extents = image_extents_of(idx); + utils::uvec3 local_group_size = {4, 4, 4}; if (image_extents.data[2u] == 1) { if (image_extents.data[1u] == 1) { diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 1c1a2b9f986..d49d0adc5d6 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -186,7 +186,7 @@ class ComputeGraph final { api::ScalarType dtype_of(const ValueRef idx) const; - inline api::utils::uvec3 image_extents_of(const ValueRef idx) const { + inline utils::uvec3 image_extents_of(const ValueRef idx) const { return values_.at(idx).toConstTensor().image_extents(); } @@ -454,7 +454,7 @@ class ComputeGraph final { * All other components will be set to 1 (i.e. {ntexels, 1, 1} will be * returned). */ - api::utils::uvec3 create_global_wg_size(const ValueRef idx); + utils::uvec3 create_global_wg_size(const ValueRef idx); /* * Suggest a local workgroup size for a given `vTensor` value, assuming that @@ -466,7 +466,7 @@ class ComputeGraph final { * Currently, the local workgroup size is hard-coded to contain a total of 64 * shader invocations. In the future, this value can be configured. */ - api::utils::uvec3 create_local_wg_size(const ValueRef idx); + utils::uvec3 create_local_wg_size(const ValueRef idx); // // Input/Output diff --git a/backends/vulkan/runtime/graph/GraphConfig.cpp b/backends/vulkan/runtime/graph/GraphConfig.cpp index 9656721c0c0..242817f56e4 100644 --- a/backends/vulkan/runtime/graph/GraphConfig.cpp +++ b/backends/vulkan/runtime/graph/GraphConfig.cpp @@ -77,7 +77,7 @@ void GraphConfig::set_memory_layout_override( } void GraphConfig::set_local_wg_size_override( - const api::utils::uvec3& local_wg_size) { + const utils::uvec3& local_wg_size) { enable_local_wg_size_override = true; local_wg_size_override = local_wg_size; } diff --git a/backends/vulkan/runtime/graph/GraphConfig.h b/backends/vulkan/runtime/graph/GraphConfig.h index efd8b21ad9a..7c732329f26 100644 --- a/backends/vulkan/runtime/graph/GraphConfig.h +++ b/backends/vulkan/runtime/graph/GraphConfig.h @@ -31,14 +31,14 @@ struct GraphConfig final { bool enable_querypool; bool enable_local_wg_size_override; - api::utils::uvec3 local_wg_size_override; + utils::uvec3 local_wg_size_override; // Generate a default graph config with pre-configured settings explicit GraphConfig(); void set_storage_type_override(api::StorageType storage_type); void set_memory_layout_override(api::GPUMemoryLayout memory_layout); - void set_local_wg_size_override(const api::utils::uvec3& local_wg_size); + void set_local_wg_size_override(const utils::uvec3& local_wg_size); }; } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/Logging.h b/backends/vulkan/runtime/graph/Logging.h index 447d52d16bd..76aaf885f66 100644 --- a/backends/vulkan/runtime/graph/Logging.h +++ b/backends/vulkan/runtime/graph/Logging.h @@ -26,20 +26,20 @@ inline std::ostream& operator<<(std::ostream& os, const std::vector& vec) { return os; // Return the ostream to allow chaining } -inline std::ostream& operator<<(std::ostream& os, const api::utils::uvec3& v) { - return api::utils::operator<<(os, v); +inline std::ostream& operator<<(std::ostream& os, const utils::uvec3& v) { + return utils::operator<<(os, v); } -inline std::ostream& operator<<(std::ostream& os, const api::utils::uvec4& v) { - return api::utils::operator<<(os, v); +inline std::ostream& operator<<(std::ostream& os, const utils::uvec4& v) { + return utils::operator<<(os, v); } -inline std::ostream& operator<<(std::ostream& os, const api::utils::ivec3& v) { - return api::utils::operator<<(os, v); +inline std::ostream& operator<<(std::ostream& os, const utils::ivec3& v) { + return utils::operator<<(os, v); } -inline std::ostream& operator<<(std::ostream& os, const api::utils::ivec4& v) { - return api::utils::operator<<(os, v); +inline std::ostream& operator<<(std::ostream& os, const utils::ivec4& v) { + return utils::operator<<(os, v); } template diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp index e3d03ce6a1c..18e6f7ea3c8 100644 --- a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp +++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp @@ -17,8 +17,8 @@ namespace vkcompute { ExecuteNode::ExecuteNode( ComputeGraph& graph, const api::ShaderInfo& shader, - const api::utils::uvec3& global_workgroup_size, - const api::utils::uvec3& local_workgroup_size, + const utils::uvec3& global_workgroup_size, + const utils::uvec3& local_workgroup_size, const std::vector& args, const api::ParamsBindList& params, const api::SpecVarList& spec_vars, diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h index 6e5824a7be5..763d37cf81a 100644 --- a/backends/vulkan/runtime/graph/ops/ExecuteNode.h +++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.h @@ -51,8 +51,8 @@ class ExecuteNode final { ExecuteNode( ComputeGraph& graph, const api::ShaderInfo& shader, - const api::utils::uvec3& global_workgroup_size, - const api::utils::uvec3& local_workgroup_size, + const utils::uvec3& global_workgroup_size, + const utils::uvec3& local_workgroup_size, const std::vector& args, const api::ParamsBindList& params, const api::SpecVarList& spec_vars = {}, @@ -76,8 +76,8 @@ class ExecuteNode final { protected: uint32_t node_id_; const api::ShaderInfo shader_; - const api::utils::uvec3 global_workgroup_size_; - const api::utils::uvec3 local_workgroup_size_; + const utils::uvec3 global_workgroup_size_; + const utils::uvec3 local_workgroup_size_; const std::vector args_; const api::ParamsBindList params_; const api::SpecVarList spec_vars_; diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp index 3671b900abc..98bbc63b615 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp @@ -27,8 +27,8 @@ api::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) { PrepackNode::PrepackNode( ComputeGraph& graph, const api::ShaderInfo& shader, - const api::utils::uvec3& global_workgroup_size, - const api::utils::uvec3& local_workgroup_size, + const utils::uvec3& global_workgroup_size, + const utils::uvec3& local_workgroup_size, const ValueRef tref, const ValueRef packed, const api::ParamsBindList& params, @@ -51,7 +51,7 @@ api::StorageBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) { // If no TensorRef is provided, create a staging buffer of zeros according to // the vTensor metadata. if (graph->val_is_none(tref_)) { - size_t numel = api::utils::multiply_integers(packed->sizes()); + size_t numel = utils::multiply_integers(packed->sizes()); api::StorageBuffer staging(graph->context(), packed->dtype(), numel); size_t nbytes = numel * api::element_size(packed->dtype()); set_staging_zeros(staging, nbytes); @@ -59,7 +59,7 @@ api::StorageBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) { } TensorRefPtr tref = graph->get_tref(tref_); - size_t numel = api::utils::multiply_integers(tref->sizes); + size_t numel = utils::multiply_integers(tref->sizes); api::StorageBuffer staging(graph->context(), tref->dtype, numel); size_t nbytes = numel * api::element_size(tref->dtype); copy_ptr_to_staging(tref->data, staging, nbytes); diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.h b/backends/vulkan/runtime/graph/ops/PrepackNode.h index 730877f3a31..0b0c32fa876 100644 --- a/backends/vulkan/runtime/graph/ops/PrepackNode.h +++ b/backends/vulkan/runtime/graph/ops/PrepackNode.h @@ -29,8 +29,8 @@ class PrepackNode final { PrepackNode( ComputeGraph& graph, const api::ShaderInfo& shader, - const api::utils::uvec3& global_workgroup_size, - const api::utils::uvec3& local_workgroup_size, + const utils::uvec3& global_workgroup_size, + const utils::uvec3& local_workgroup_size, const ValueRef tref, const ValueRef packed, const api::ParamsBindList& params, @@ -48,8 +48,8 @@ class PrepackNode final { uint32_t node_id_; const api::ShaderInfo shader_; api::ShaderInfo noop_shader_; - const api::utils::uvec3 global_workgroup_size_; - const api::utils::uvec3 local_workgroup_size_; + const utils::uvec3 global_workgroup_size_; + const utils::uvec3 local_workgroup_size_; const ValueRef tref_; const ValueRef packed_; const api::ParamsBindList params_; diff --git a/backends/vulkan/runtime/graph/ops/impl/Arange.cpp b/backends/vulkan/runtime/graph/ops/impl/Arange.cpp index 8e7dcea2a5f..84ed0adb3ac 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Arange.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Arange.cpp @@ -33,7 +33,7 @@ void resize_arange_node( } std::vector out_sizes = { - api::utils::div_up(end_val - start_val, step_val)}; + utils::div_up(end_val - start_val, step_val)}; out->virtual_resize(out_sizes); } diff --git a/backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp b/backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp index 4135817c0f3..67f43882302 100644 --- a/backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp @@ -78,7 +78,7 @@ void add_native_batch_norm_node( add_dtype_suffix(kernel_name, *t_out); int32_t num_texel_per_batch = - api::utils::div_up_4((dim_at(t_in->sizes()))); + utils::div_up_4((dim_at(t_in->sizes()))); graph.execute_nodes().emplace_back(new ExecuteNode( graph, diff --git a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp index 81433aba2e0..62b72dc7c56 100644 --- a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp @@ -68,8 +68,7 @@ void add_binary_op_node( alpha_val = graph.extract_scalar(alpha); } - const api::utils::ivec2 broadcast_params = - create_broadcast_params(*t_in1, *t_in2); + const utils::ivec2 broadcast_params = create_broadcast_params(*t_in1, *t_in2); std::string kernel_name("binary_"); kernel_name.reserve(kShaderNameReserve); diff --git a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp index 6266f06e3c0..a2697af27f8 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp @@ -35,35 +35,35 @@ void add_cat_default_node( // TODO: Find ways to factor out the similar code for width, height, and batch if (dim_index == kWidth4D) { - api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); - api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 src_offset = utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 dst_offset = utils::make_ivec3({0, 0, 0}, false); for (ValueRef input_ref : *input_list) { vTensorPtr t_in = graph.get_tensor(input_ref); - api::utils::ivec3 range = t_in->texture_limits(); + utils::ivec3 range = t_in->texture_limits(); add_copy_offset_node( graph, input_ref, range, src_offset, dst_offset, out); dst_offset.data[0] += range.data[0]; } } else if (dim_index == kHeight4D) { - api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); - api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 src_offset = utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 dst_offset = utils::make_ivec3({0, 0, 0}, false); for (ValueRef input_ref : *input_list) { vTensorPtr t_in = graph.get_tensor(input_ref); - api::utils::ivec3 range = t_in->texture_limits(); + utils::ivec3 range = t_in->texture_limits(); add_copy_offset_node( graph, input_ref, range, src_offset, dst_offset, out); dst_offset.data[1] += range.data[1]; } } else if (dim_index == kBatch4D) { - api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); - api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 src_offset = utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 dst_offset = utils::make_ivec3({0, 0, 0}, false); for (ValueRef input_ref : *input_list) { vTensorPtr t_in = graph.get_tensor(input_ref); - api::utils::ivec3 range = t_in->texture_limits(); + utils::ivec3 range = t_in->texture_limits(); add_copy_offset_node( graph, input_ref, range, src_offset, dst_offset, out); dst_offset.data[2] += range.data[2]; diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index e231347b284..857e9c695c6 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -169,12 +169,11 @@ api::ShaderInfo get_conv2d_shader( std::vector get_final_sizes( const std::vector& original_sizes, const Conv2dMethod method) { - int64_t batch_padded = - api::utils::align_up_4(api::utils::val_at(-4, original_sizes)); + int64_t batch_padded = utils::align_up_4(utils::val_at(-4, original_sizes)); int64_t channels_padded = - api::utils::align_up_4(api::utils::val_at(-3, original_sizes)); - int64_t height = api::utils::val_at(-2, original_sizes); - int64_t width = api::utils::val_at(-1, original_sizes); + utils::align_up_4(utils::val_at(-3, original_sizes)); + int64_t height = utils::val_at(-2, original_sizes); + int64_t width = utils::val_at(-1, original_sizes); switch (method) { case Conv2dMethod::Depthwise: @@ -212,7 +211,7 @@ ValueRef prepack_weights( v, {t->sizes_ubo(), graph.create_params_buffer( - api::utils::make_ivec4(original_sizes, /*reverse = */ true))}, + utils::make_ivec4(original_sizes, /*reverse = */ true))}, // Specialization constants {SV(t->packed_dim_whcn_idx())})); @@ -225,7 +224,7 @@ void check_conv_args(const vTensor& in, const vTensor& out) { } struct Conv2dParams final { - api::utils::ivec2 overlay_region; + utils::ivec2 overlay_region; int in_group_size; }; @@ -239,16 +238,15 @@ Conv2dParams create_conv2d_params( const ValueRef weight, const Kernel2dParams& p, const bool transposed) { - const auto& overlay_region = api::utils::make_ivec2({ + const auto& overlay_region = utils::make_ivec2({ p.kernel_size.data[0] + (p.kernel_size.data[0] - 1) * (p.dilation.data[0] - 1), p.kernel_size.data[1] + (p.kernel_size.data[1] - 1) * (p.dilation.data[1] - 1), }); const auto weight_sizes = graph.sizes_of(weight); - const int32_t in_group_size = - api::utils::safe_downcast(api::utils::align_up_4( - transposed ? weight_sizes.at(0) : weight_sizes.at(1))); + const int32_t in_group_size = utils::safe_downcast( + utils::align_up_4(transposed ? weight_sizes.at(0) : weight_sizes.at(1))); return {overlay_region, in_group_size}; } @@ -289,15 +287,15 @@ Conv2dMethod get_conv2d_method( return Conv2dMethod::SlidingWindow; } -api::utils::uvec3 create_conv2d_global_wg_size( +utils::uvec3 create_conv2d_global_wg_size( ComputeGraph& graph, const Conv2dMethod method, const ValueRef out) { if (method == Conv2dMethod::Pointwise) { - const api::utils::uvec3 image_extents = graph.image_extents_of(out); + const utils::uvec3 image_extents = graph.image_extents_of(out); return { - api::utils::div_up(image_extents.data[0u], 2u), - api::utils::div_up(image_extents.data[1u], 2u), + utils::div_up(image_extents.data[0u], 2u), + utils::div_up(image_extents.data[1u], 2u), image_extents.data[2u]}; } else { return graph.create_global_wg_size(out); @@ -445,8 +443,8 @@ void add_conv1d_node( int32_t in_group_size = static_cast(in_channels / groups_val); int32_t out_group_size = static_cast(out_channels / groups_val); - api::utils::uvec3 global_size = {1, static_cast(out_channels), 1}; - api::utils::uvec3 local_size = {1, 1, 1}; + utils::uvec3 global_size = {1, static_cast(out_channels), 1}; + utils::uvec3 local_size = {1, 1, 1}; Kernel1dParams kernel_params = { kernel_size, diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp index 41691757481..3bd8c2c6666 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp @@ -15,8 +15,8 @@ namespace vkcompute { -using api::utils::ivec3; -using api::utils::uvec3; +using utils::ivec3; +using utils::uvec3; void add_copy_offset_node( ComputeGraph& graph, @@ -132,17 +132,17 @@ void add_copy_channel_offset_node( // the actual coordinate. ivec3 dst_offset{ - 0, 0, dst_first_z + batch_idx * api::utils::div_up_4(out_channels)}; + 0, 0, dst_first_z + batch_idx * utils::div_up_4(out_channels)}; uvec3 global_size{ - api::utils::safe_downcast(dim_at(in_sizes)), - api::utils::safe_downcast(dim_at(in_sizes)), - api::utils::safe_downcast(dst_last_z - dst_first_z + 1)}; + utils::safe_downcast(dim_at(in_sizes)), + utils::safe_downcast(dim_at(in_sizes)), + utils::safe_downcast(dst_last_z - dst_first_z + 1)}; uvec3 local_size = adaptive_work_group_size(global_size); const struct Block final { - api::utils::ivec4 out_sizes; - api::utils::ivec4 in_sizes; + utils::ivec4 out_sizes; + utils::ivec4 in_sizes; int32_t channel_range; int32_t src_channel_offset; int32_t dst_channel_offset; @@ -153,13 +153,13 @@ void add_copy_channel_offset_node( int32_t unused2; } channel_offset_params{ - api::utils::make_whcn_ivec4(out_sizes), - api::utils::make_whcn_ivec4(in_sizes), + utils::make_whcn_ivec4(out_sizes), + utils::make_whcn_ivec4(in_sizes), channel_range, src_channel_offset, dst_channel_offset, 0, - api::utils::make_ivec3(global_size), + utils::make_ivec3(global_size), 0, dst_offset, 0, @@ -192,11 +192,9 @@ void add_copy_offset_node( ValueRef src_offset_ref, ValueRef dst_offset_ref, ValueRef out) { - ivec3 range = api::utils::make_ivec3(*graph.get_int_list(range_ref)); - ivec3 src_offset = - api::utils::make_ivec3(*graph.get_int_list(src_offset_ref)); - ivec3 dst_offset = - api::utils::make_ivec3(*graph.get_int_list(dst_offset_ref)); + ivec3 range = utils::make_ivec3(*graph.get_int_list(range_ref)); + ivec3 src_offset = utils::make_ivec3(*graph.get_int_list(src_offset_ref)); + ivec3 dst_offset = utils::make_ivec3(*graph.get_int_list(dst_offset_ref)); add_copy_offset_node(graph, in, range, src_offset, dst_offset, out); } diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.h b/backends/vulkan/runtime/graph/ops/impl/Copy.h index 60a58b2fa84..60bb20eedf0 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.h +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.h @@ -24,9 +24,9 @@ namespace vkcompute { void add_copy_offset_node( ComputeGraph& graph, const ValueRef in, - const api::utils::ivec3& range, - const api::utils::ivec3& src_offset, - const api::utils::ivec3& dst_offset, + const utils::ivec3& range, + const utils::ivec3& src_offset, + const utils::ivec3& dst_offset, const ValueRef out); // add_copy_channel_offset_node behaves similar to add_copy_node, except that it diff --git a/backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp b/backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp index 23f41d80898..0d69f51cf31 100644 --- a/backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp @@ -65,7 +65,7 @@ IndexSelectParams create_index_select_params( return {1, 1}; } else if (dim_idx == kBatch4D) { int64_t n_channels = dim_at(in.sizes(), kChannel4D); - int64_t stride = api::utils::div_up_4(n_channels); + int64_t stride = utils::div_up_4(n_channels); return {2, static_cast(stride)}; } else { VK_THROW("Unexpected dim_idx!"); diff --git a/backends/vulkan/runtime/graph/ops/impl/Linear.cpp b/backends/vulkan/runtime/graph/ops/impl/Linear.cpp index a1ea7ef2123..8e38b4b3420 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Linear.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Linear.cpp @@ -38,18 +38,15 @@ void check_addmm_args( VK_CHECK_COND(graph.memory_layout_of(mat1) == graph.memory_layout_of(out)); - VK_CHECK_COND( - api::utils::val_at(-1, mat1_sizes) == api::utils::val_at(-2, mat2_sizes)); + VK_CHECK_COND(utils::val_at(-1, mat1_sizes) == utils::val_at(-2, mat2_sizes)); - if (api::utils::val_at(-1, self_sizes) != 1) { + if (utils::val_at(-1, self_sizes) != 1) { VK_CHECK_COND( - api::utils::val_at(-1, self_sizes) == - api::utils::val_at(-1, mat2_sizes)); + utils::val_at(-1, self_sizes) == utils::val_at(-1, mat2_sizes)); } - if (api::utils::val_at(-2, self_sizes) != 1) { + if (utils::val_at(-2, self_sizes) != 1) { VK_CHECK_COND( - api::utils::val_at(-2, self_sizes) == - api::utils::val_at(-2, mat1_sizes)); + utils::val_at(-2, self_sizes) == utils::val_at(-2, mat1_sizes)); } } @@ -64,10 +61,9 @@ void resize_addmm_node( bool mat2_is_transposed = graph->get_bool(extra_args[0]); - const int out_cols = api::utils::val_at(-2, mat1->sizes()); - const int out_rows = mat2_is_transposed - ? api::utils::val_at(-2, mat2->sizes()) - : api::utils::val_at(-1, mat2->sizes()); + const int out_cols = utils::val_at(-2, mat1->sizes()); + const int out_rows = mat2_is_transposed ? utils::val_at(-2, mat2->sizes()) + : utils::val_at(-1, mat2->sizes()); std::vector new_out_sizes(3); if (mat1->sizes().size() == 2) { @@ -176,13 +172,13 @@ void add_addmm_optimized_node( add_dtype_suffix(kernel_name, graph.dtype_of(out)); - api::utils::uvec3 global_size; + utils::uvec3 global_size; if (mat1_sizes.at(mat1_dims - 2) < 8) { - global_size = api::utils::divup_vec(graph.image_extents_of(out), {4, 2, 1}); + global_size = utils::divup_vec(graph.image_extents_of(out), {4, 2, 1}); } else { - global_size = api::utils::divup_vec(graph.image_extents_of(out), {4, 4, 1}); + global_size = utils::divup_vec(graph.image_extents_of(out), {4, 4, 1}); } - api::utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 local_size = adaptive_work_group_size(global_size); graph.execute_nodes().emplace_back(new ExecuteNode( graph, diff --git a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp index d686b3991ee..be6929c2b65 100644 --- a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp @@ -31,8 +31,7 @@ void check_matmul_args( VK_CHECK_COND(graph.memory_layout_of(mat1) == graph.memory_layout_of(out)); - VK_CHECK_COND( - api::utils::val_at(-1, mat1_sizes) == api::utils::val_at(-2, mat2_sizes)); + VK_CHECK_COND(utils::val_at(-1, mat1_sizes) == utils::val_at(-2, mat2_sizes)); } void resize_matmul_node( @@ -45,10 +44,9 @@ void resize_matmul_node( bool mat2_is_transposed = graph->get_bool(extra_args[0]); - const int out_cols = api::utils::val_at(-2, mat1->sizes()); - const int out_rows = mat2_is_transposed - ? api::utils::val_at(-2, mat2->sizes()) - : api::utils::val_at(-1, mat2->sizes()); + const int out_cols = utils::val_at(-2, mat1->sizes()); + const int out_rows = mat2_is_transposed ? utils::val_at(-2, mat2->sizes()) + : utils::val_at(-1, mat2->sizes()); std::vector new_out_sizes(3); if (mat1->sizes().size() == 2) { @@ -141,13 +139,13 @@ void add_matmul_optimized_node( add_dtype_suffix(kernel_name, graph.dtype_of(out)); - api::utils::uvec3 global_size; + utils::uvec3 global_size; if (mat1_sizes.at(mat1_dims - 2) < 8) { - global_size = api::utils::divup_vec(graph.image_extents_of(out), {4, 2, 1}); + global_size = utils::divup_vec(graph.image_extents_of(out), {4, 2, 1}); } else { - global_size = api::utils::divup_vec(graph.image_extents_of(out), {4, 4, 1}); + global_size = utils::divup_vec(graph.image_extents_of(out), {4, 4, 1}); } - api::utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 local_size = adaptive_work_group_size(global_size); graph.execute_nodes().emplace_back(new ExecuteNode( graph, diff --git a/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp b/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp index 2cf80896209..9d640c08b36 100644 --- a/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp @@ -91,8 +91,8 @@ void add_native_layer_norm_node( std::vector in_sizes = t_input->sizes(); - api::utils::uvec3 global_size = t_mean->image_extents(); - api::utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 global_size = t_mean->image_extents(); + utils::uvec3 local_size = adaptive_work_group_size(global_size); std::string kernel_name("native_layer_norm"); kernel_name.reserve(kShaderNameReserve); diff --git a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp index 404639664a6..7b038a02cc6 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp @@ -17,10 +17,10 @@ namespace vkcompute { -using api::utils::ivec2; -using api::utils::ivec3; -using api::utils::ivec4; -using api::utils::uvec4; +using utils::ivec2; +using utils::ivec3; +using utils::ivec4; +using utils::uvec4; namespace { @@ -74,8 +74,8 @@ void add_permute_node( int32_t out_channels = dim_at(t_out->sizes()); int32_t in_channels = dim_at(t_in->sizes()); - int32_t out_c_aligned = api::utils::align_up_4(out_channels); - int32_t in_c_aligned = api::utils::align_up_4(in_channels); + int32_t out_c_aligned = utils::align_up_4(out_channels); + int32_t in_c_aligned = utils::align_up_4(in_channels); const struct Block final { ivec4 out_ndims; diff --git a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp index e5da01564f1..80898d92930 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp @@ -79,8 +79,8 @@ void add_max_pool2d_node( check_pool2d_args(*t_in, *t_out); - api::utils::uvec3 global_size = t_out->image_extents(); - api::utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 global_size = t_out->image_extents(); + utils::uvec3 local_size = adaptive_work_group_size(global_size); std::string kernel_name("max_pool2d"); add_dtype_suffix(kernel_name, *t_out); @@ -155,8 +155,8 @@ void add_avg_pool2d_node( check_pool2d_args(*t_in, *t_out); - api::utils::uvec3 global_size = t_out->image_extents(); - api::utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 global_size = t_out->image_extents(); + utils::uvec3 local_size = adaptive_work_group_size(global_size); std::string kernel_name("avg_pool2d"); add_dtype_suffix(kernel_name, *t_out); diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp index a7437b1f1c2..fe15c57b97c 100644 --- a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp @@ -33,11 +33,9 @@ void check_qlinear_args( VK_CHECK_COND(graph.memory_layout_of(mat1) == graph.memory_layout_of(out)); VK_CHECK_COND( - api::utils::val_at(-1, mat1_sizes) == - api::utils::val_at(-1, qmat2_sizes)); + utils::val_at(-1, mat1_sizes) == utils::val_at(-1, qmat2_sizes)); VK_CHECK_COND( - api::utils::val_at(-1, scales_sizes) == - api::utils::val_at(-2, qmat2_sizes)); + utils::val_at(-1, scales_sizes) == utils::val_at(-2, qmat2_sizes)); } void resize_qlinear_node( @@ -50,8 +48,8 @@ void resize_qlinear_node( vTensorPtr mat1 = graph->get_tensor(args[1].refs[0]); vTensorPtr qmat2 = graph->get_tensor(args[1].refs[1]); - const int out_cols = api::utils::val_at(-2, mat1->sizes()); - const int out_rows = api::utils::val_at(-2, qmat2->sizes()); + const int out_cols = utils::val_at(-2, mat1->sizes()); + const int out_rows = utils::val_at(-2, qmat2->sizes()); std::vector new_out_sizes(3); if (mat1->sizes().size() == 2) { diff --git a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp index 55daecfd1ee..c76e1a4b778 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp @@ -59,7 +59,7 @@ void add_repeat_channel_node( ValueRef in, int64_t repeat_channel, ValueRef out, - api::utils::ivec3& running_range) { + utils::ivec3& running_range) { vTensorPtr t_in = graph.get_tensor(in); vTensorPtr t_out = graph.get_tensor(out); @@ -69,30 +69,28 @@ void add_repeat_channel_node( const std::vector& in_sizes = t_in->sizes(); - int32_t in_width = - api::utils::safe_downcast(dim_at(in_sizes)); + int32_t in_width = utils::safe_downcast(dim_at(in_sizes)); int32_t in_height = - api::utils::safe_downcast(dim_at(in_sizes)); + utils::safe_downcast(dim_at(in_sizes)); int32_t in_channel = - api::utils::safe_downcast(dim_at(in_sizes)); - int32_t in_batch = - api::utils::safe_downcast(dim_at(in_sizes)); + utils::safe_downcast(dim_at(in_sizes)); + int32_t in_batch = utils::safe_downcast(dim_at(in_sizes)); int32_t out_channel = repeat_channel * in_channel; - api::utils::ivec4 out_whcn_sizes{in_width, in_height, out_channel, in_batch}; + utils::ivec4 out_whcn_sizes{in_width, in_height, out_channel, in_batch}; - api::utils::ivec4 in_whcn_sizes{in_width, in_height, in_channel, in_batch}; + utils::ivec4 in_whcn_sizes{in_width, in_height, in_channel, in_batch}; // Channel packed global work ids running_range.data[2] = - out_whcn_sizes.data[3] * api::utils::div_up_4(out_whcn_sizes.data[2]); - api::utils::uvec3 global_size = api::utils::make_uvec3(running_range); - api::utils::uvec3 local_size = adaptive_work_group_size(global_size); + out_whcn_sizes.data[3] * utils::div_up_4(out_whcn_sizes.data[2]); + utils::uvec3 global_size = utils::make_uvec3(running_range); + utils::uvec3 local_size = adaptive_work_group_size(global_size); const struct Block final { - api::utils::ivec4 out_sizes; - api::utils::ivec4 in_size; + utils::ivec4 out_sizes; + utils::ivec4 in_size; } repeat_channel_args{ out_whcn_sizes, in_whcn_sizes, @@ -132,7 +130,7 @@ void add_repeat_node( // After expanding a dimension, we will update the "running_range" since we // will need to copy the "expanded" area. - api::utils::ivec3 running_range = t_in->texture_limits(); + utils::ivec3 running_range = t_in->texture_limits(); const std::vector& in_sizes = t_in->sizes(); @@ -145,8 +143,8 @@ void add_repeat_node( if (int64_t channel_repeat = dim_at(repeats); channel_repeat == 1) { // If no repeat, short-cut to a direct copy - api::utils::ivec3 src_offset{0, 0, 0}; - api::utils::ivec3 dst_offset{0, 0, 0}; + utils::ivec3 src_offset{0, 0, 0}; + utils::ivec3 dst_offset{0, 0, 0}; add_copy_offset_node(graph, in, running_range, src_offset, dst_offset, out); @@ -157,10 +155,10 @@ void add_repeat_node( // TODO: refactor width, height, and batch into a common helper function. // Width if (int64_t width_repeat = dim_at(repeats); width_repeat > 1) { - api::utils::ivec3 src_offset{0, 0, 0}; + utils::ivec3 src_offset{0, 0, 0}; for (int i = 1; i < width_repeat; ++i) { - api::utils::ivec3 dst_offset{i * dim_at(in_sizes), 0, 0}; + utils::ivec3 dst_offset{i * dim_at(in_sizes), 0, 0}; add_copy_offset_node( graph, out, running_range, src_offset, dst_offset, out); @@ -171,10 +169,10 @@ void add_repeat_node( // Height if (int64_t height_repeat = dim_at(repeats); height_repeat > 1) { - api::utils::ivec3 src_offset{0, 0, 0}; + utils::ivec3 src_offset{0, 0, 0}; for (int i = 1; i < height_repeat; ++i) { - api::utils::ivec3 dst_offset = {0, i * dim_at(in_sizes), 0}; + utils::ivec3 dst_offset = {0, i * dim_at(in_sizes), 0}; add_copy_offset_node( graph, out, running_range, src_offset, dst_offset, out); @@ -185,10 +183,10 @@ void add_repeat_node( // Batch if (int64_t batch_repeat = dim_at(repeats); batch_repeat > 1) { - api::utils::ivec3 src_offset{0, 0, 0}; + utils::ivec3 src_offset{0, 0, 0}; for (int i = 1; i < batch_repeat; ++i) { - api::utils::ivec3 dst_offset = {0, 0, i * running_range.data[2]}; + utils::ivec3 dst_offset = {0, 0, i * running_range.data[2]}; add_copy_offset_node( graph, out, running_range, src_offset, dst_offset, out); diff --git a/backends/vulkan/runtime/graph/ops/impl/Select.cpp b/backends/vulkan/runtime/graph/ops/impl/Select.cpp index 159498edd3a..bb09f807738 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Select.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Select.cpp @@ -116,8 +116,8 @@ void add_select_int_node( // TODO: num_batches and num_texel_per_batch are provided by // t_out->sizes. Can change the following to reduce params // created. - graph.create_params_buffer(api::utils::make_ivec4( - {index, num_batches, num_texel_per_batch, 0}))}, + graph.create_params_buffer( + utils::make_ivec4({index, num_batches, num_texel_per_batch, 0}))}, // Specialization Constants {})); } diff --git a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp index 1bfeb09bb21..43ed387dac6 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp @@ -114,7 +114,7 @@ void add_slice_tensor_out_node( // Due to channel packing, each batch value is span over stride planes int64_t n_channels = dim_at(in_sizes, kChannel4D); - stride = api::utils::div_up_4(n_channels); + stride = utils::div_up_4(n_channels); } else { VK_THROW("Unexpected ncwh_dim!"); } @@ -123,8 +123,8 @@ void add_slice_tensor_out_node( kernel_name.reserve(kShaderNameReserve); add_dtype_suffix(kernel_name, *t_out); - api::utils::uvec3 global_size = t_out->image_extents(); - api::utils::uvec3 local_size = adaptive_work_group_size(global_size); + utils::uvec3 global_size = t_out->image_extents(); + utils::uvec3 local_size = adaptive_work_group_size(global_size); const struct Block final { int dim; diff --git a/backends/vulkan/runtime/graph/ops/impl/Softmax.cpp b/backends/vulkan/runtime/graph/ops/impl/Softmax.cpp index b72c9f6cd62..6877056966d 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Softmax.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Softmax.cpp @@ -15,7 +15,7 @@ namespace vkcompute { -using namespace api::utils; +using namespace utils; void resize_softmax_node( ComputeGraph* graph, @@ -66,8 +66,7 @@ void add_softmax_node( // Shader params buffers {t_out->texture_limits_ubo(), t_in->sizes_ubo(), - graph.create_params_buffer( - api::utils::make_ivec2({in_dim, softmax_dim}))}, + graph.create_params_buffer(utils::make_ivec2({in_dim, softmax_dim}))}, // Specialization Constants {}, // Resizing Logic diff --git a/backends/vulkan/runtime/graph/ops/impl/Split.cpp b/backends/vulkan/runtime/graph/ops/impl/Split.cpp index 111e6c1cf04..c77f248f20f 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Split.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Split.cpp @@ -43,36 +43,36 @@ void add_split_with_sizes_default_node( } if (dim_index == kWidth4D) { - api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); - api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 src_offset = utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 dst_offset = utils::make_ivec3({0, 0, 0}, false); for (ValueRef out_ref : *out_list) { // Doesn't need to use split_size since we have already verified that the // output tensor's size matches with the split_size. vTensorPtr t_out = graph.get_tensor(out_ref); - api::utils::ivec3 range = t_out->texture_limits(); + utils::ivec3 range = t_out->texture_limits(); add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref); src_offset.data[0] += range.data[0]; } } else if (dim_index == kHeight4D) { - api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); - api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 src_offset = utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 dst_offset = utils::make_ivec3({0, 0, 0}, false); for (ValueRef out_ref : *out_list) { vTensorPtr t_out = graph.get_tensor(out_ref); - api::utils::ivec3 range = t_out->texture_limits(); + utils::ivec3 range = t_out->texture_limits(); add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref); src_offset.data[1] += range.data[1]; } } else if (dim_index == kBatch4D) { - api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); - api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 src_offset = utils::make_ivec3({0, 0, 0}, false); + utils::ivec3 dst_offset = utils::make_ivec3({0, 0, 0}, false); for (ValueRef out_ref : *out_list) { vTensorPtr t_out = graph.get_tensor(out_ref); - api::utils::ivec3 range = t_out->texture_limits(); + utils::ivec3 range = t_out->texture_limits(); add_copy_offset_node(graph, in, range, src_offset, dst_offset, out_ref); src_offset.data[2] += range.data[2]; diff --git a/backends/vulkan/runtime/graph/ops/impl/Upsample.cpp b/backends/vulkan/runtime/graph/ops/impl/Upsample.cpp index ba6b511a305..45135b2018d 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Upsample.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Upsample.cpp @@ -66,29 +66,28 @@ void add_upsample_nearest2d_node( ValueRef arg_in = prepack_if_tensor_ref(graph, in); vTensorPtr t_in = graph.get_tensor(in); - api::utils::uvec3 input_sizes = t_in->image_extents(); + utils::uvec3 input_sizes = t_in->image_extents(); - api::utils::ivec2 input_size = { - api::utils::safe_downcast(input_sizes.data[0]), - api::utils::safe_downcast(input_sizes.data[1])}; - api::utils::vec2 rev_scales = { - api::utils::safe_downcast(1.0), - api::utils::safe_downcast(1.0)}; + utils::ivec2 input_size = { + utils::safe_downcast(input_sizes.data[0]), + utils::safe_downcast(input_sizes.data[1])}; + utils::vec2 rev_scales = { + utils::safe_downcast(1.0), utils::safe_downcast(1.0)}; // Reverse scale factors that pre-computed before GLSL. if (!graph.val_is_none(output_sizes)) { auto output_size_ref = graph.get_int_list(output_sizes); rev_scales = { - api::utils::safe_downcast( + utils::safe_downcast( (float)input_size.data[0] / output_size_ref->at(1)), - api::utils::safe_downcast( + utils::safe_downcast( (float)input_size.data[1] / output_size_ref->at(0))}; } else { auto scales = graph.get_double_list(scale_factors); rev_scales = { - api::utils::safe_downcast(1.0 / scales->at(1)), - api::utils::safe_downcast(1.0 / scales->at(0))}; + utils::safe_downcast(1.0 / scales->at(1)), + utils::safe_downcast(1.0 / scales->at(0))}; } vTensorPtr t_out = graph.get_tensor(out); diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h b/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h index 5876ed9ac75..fcb67c1505d 100644 --- a/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h +++ b/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h @@ -74,7 +74,7 @@ inline int32_t dim_at(const std::vector& sizes, DimIndex dim_index) { // Recall that dim_index is a negative index. return dims < -dim_index ? 1 - : api::utils::safe_downcast(sizes[dims + dim_index]); + : utils::safe_downcast(sizes[dims + dim_index]); } template diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.cpp b/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.cpp index f724b2c96a6..c5cef52f7a7 100644 --- a/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.cpp @@ -10,11 +10,11 @@ namespace vkcompute { -api::utils::ivec2 make_ivec2_from_list(ComputeGraph& graph, ValueRef vref) { - return api::utils::make_ivec2(*graph.get_int_list(vref), /*reverse = */ true); +utils::ivec2 make_ivec2_from_list(ComputeGraph& graph, ValueRef vref) { + return utils::make_ivec2(*graph.get_int_list(vref), /*reverse = */ true); } -api::utils::ivec2 make_ivec2_kernel_size( +utils::ivec2 make_ivec2_kernel_size( ComputeGraph& graph, const ValueRef weight, const bool kernel_size_only) { @@ -22,7 +22,7 @@ api::utils::ivec2 make_ivec2_kernel_size( return make_ivec2_from_list(graph, weight); } else { const auto weight_sizes = graph.get_tref(weight)->sizes; - return api::utils::make_ivec2({weight_sizes.at(3), weight_sizes.at(2)}); + return utils::make_ivec2({weight_sizes.at(3), weight_sizes.at(2)}); } } @@ -74,10 +74,10 @@ int64_t calc_out_size( std::vector calc_out_sizes_hw( const std::vector& in_sizes, - const api::utils::ivec2& kernel_size, - const api::utils::ivec2& stride, - const api::utils::ivec2& padding, - const api::utils::ivec2& dilation, + const utils::ivec2& kernel_size, + const utils::ivec2& stride, + const utils::ivec2& padding, + const utils::ivec2& dilation, const bool ceil_mode) { const int64_t ndim = in_sizes.size(); std::vector out_sizes(2); @@ -117,11 +117,11 @@ int64_t calc_transpose_out_size( std::vector calc_transpose_out_sizes_hw( const std::vector& in_sizes, - const api::utils::ivec2& kernel_size, - const api::utils::ivec2& stride, - const api::utils::ivec2& padding, - const api::utils::ivec2& dilation, - const api::utils::ivec2& output_padding) { + const utils::ivec2& kernel_size, + const utils::ivec2& stride, + const utils::ivec2& padding, + const utils::ivec2& dilation, + const utils::ivec2& output_padding) { const int64_t ndim = in_sizes.size(); std::vector out_sizes(2); @@ -157,7 +157,7 @@ std::vector calc_out_sizes_hw( const auto stride = make_ivec2_from_list(graph, args[0]); const auto padding = make_ivec2_from_list(graph, args[1]); const auto dilation = args[2] == kDummyValueRef - ? api::utils::ivec2{1, 1} + ? utils::ivec2{1, 1} : make_ivec2_from_list(graph, args[2]); if (transposed) { diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h b/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h index fdf8a5017e8..1e8b5b0f7a4 100644 --- a/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h +++ b/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h @@ -26,10 +26,10 @@ struct Kernel1dParams final { }; struct Kernel2dParams final { - api::utils::ivec2 kernel_size; - api::utils::ivec2 stride; - api::utils::ivec2 padding; - api::utils::ivec2 dilation; + utils::ivec2 kernel_size; + utils::ivec2 stride; + utils::ivec2 padding; + utils::ivec2 dilation; }; Kernel2dParams create_kernel2d_params( diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp index f5e0d2b1713..e8a24bb0cb0 100644 --- a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp @@ -22,8 +22,8 @@ std::vector calculate_broadcasted_output_size( // Match the sizes in reverse because sizes are in NCHW order for (int i = -1; i >= -out_sizes.size(); --i) { - out_sizes.at(out_sizes.size() + i) = std::max( - api::utils::val_at(i, t1.sizes()), api::utils::val_at(i, t2.sizes())); + out_sizes.at(out_sizes.size() + i) = + std::max(utils::val_at(i, t1.sizes()), utils::val_at(i, t2.sizes())); } return out_sizes; @@ -42,8 +42,7 @@ bool check_same_sizes_at( const int64_t d1, const vTensor& t2, const int64_t d2) { - return api::utils::val_at(d1, t1.sizes()) == - api::utils::val_at(d2, t2.sizes()); + return utils::val_at(d1, t1.sizes()) == utils::val_at(d2, t2.sizes()); } bool check_memory_layout_is(const vTensor& t, api::GPUMemoryLayout layout) { @@ -77,21 +76,16 @@ bool is_packed_dim_broadcasted(const vTensor& sndr, const vTensor& rcvr) { // some index, then the value of rcvr is 1 and hence should be broadcasted. switch (sndr.gpu_memory_layout()) { case api::kChannelsPacked: - return api::utils::val_at(-3, sndr.sizes()) > - api::utils::val_at(-3, rcvr.sizes()); + return utils::val_at(-3, sndr.sizes()) > utils::val_at(-3, rcvr.sizes()); case api::kHeightPacked: - return api::utils::val_at(-2, sndr.sizes()) > - api::utils::val_at(-2, rcvr.sizes()); + return utils::val_at(-2, sndr.sizes()) > utils::val_at(-2, rcvr.sizes()); case api::kWidthPacked: - return api::utils::val_at(-1, sndr.sizes()) > - api::utils::val_at(-1, rcvr.sizes()); + return utils::val_at(-1, sndr.sizes()) > utils::val_at(-1, rcvr.sizes()); } } -api::utils::ivec2 create_broadcast_params( - const vTensor& t1, - const vTensor& t2) { - return api::utils::make_ivec2( +utils::ivec2 create_broadcast_params(const vTensor& t1, const vTensor& t2) { + return utils::make_ivec2( {is_packed_dim_broadcasted(t2, t1), is_packed_dim_broadcasted(t1, t2)}); } @@ -99,9 +93,8 @@ api::utils::ivec2 create_broadcast_params( // Work group size calculation functions // -api::utils::uvec3 adaptive_work_group_size( - const api::utils::uvec3& global_work_group) { - api::utils::uvec3 local_group_size = {4, 4, 4}; +utils::uvec3 adaptive_work_group_size(const utils::uvec3& global_work_group) { + utils::uvec3 local_group_size = {4, 4, 4}; if (global_work_group.data[2u] == 1) { if (global_work_group.data[1u] < 8) { local_group_size.data[0u] = 16; diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h index 5454f60a83e..2d0ce242068 100644 --- a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h +++ b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h @@ -47,14 +47,13 @@ bool check_same_memory_layout( // Broadcast flag functions // -api::utils::ivec2 create_broadcast_params(const vTensor& t1, const vTensor& t2); +utils::ivec2 create_broadcast_params(const vTensor& t1, const vTensor& t2); // // Work group size calculation functions // -api::utils::uvec3 adaptive_work_group_size( - const api::utils::uvec3& global_work_group); +utils::uvec3 adaptive_work_group_size(const utils::uvec3& global_work_group); // // Tensor dim utilities diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index e2eecc5b835..7e2e689c3f8 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -127,7 +127,7 @@ void record_conv2d_prepack_weights_op( api::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name); api::ParamsBuffer original_sizes_ubo( - context, api::utils::make_ivec4(original_sizes, /*reverse = */ true)); + context, utils::make_ivec4(original_sizes, /*reverse = */ true)); api::SpecVarList specialization_constants = {}; context->submit_compute_job( diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index fcad7184c60..47dcc988e4d 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -70,14 +70,14 @@ std::vector get_reference_strides( const std::vector& sizes, const api::GPUMemoryLayout layout, const bool texel_strides) { - int64_t C = api::utils::val_at(-3, sizes); - int64_t H = api::utils::val_at(-2, sizes); - int64_t W = api::utils::val_at(-1, sizes); + int64_t C = utils::val_at(-3, sizes); + int64_t H = utils::val_at(-2, sizes); + int64_t W = utils::val_at(-1, sizes); switch (layout) { case api::kWidthPacked: if (texel_strides) { - W = api::utils::div_up(W, INT64_C(4)); + W = utils::div_up(W, INT64_C(4)); } switch (sizes.size()) { case 1: @@ -94,7 +94,7 @@ std::vector get_reference_strides( break; case api::kHeightPacked: if (texel_strides) { - H = api::utils::div_up(H, INT64_C(4)); + H = utils::div_up(H, INT64_C(4)); } switch (sizes.size()) { case 1: @@ -110,7 +110,7 @@ std::vector get_reference_strides( } case api::kChannelsPacked: if (texel_strides) { - C = api::utils::div_up(C, INT64_C(4)); + C = utils::div_up(C, INT64_C(4)); } switch (sizes.size()) { case 1: @@ -218,7 +218,7 @@ TEST_F(VulkanComputeAPITest, spec_var_shader_test) { { api::ParamsBuffer params(api::context(), int32_t(len)); - uint32_t len_div4 = api::utils::div_up(uint32_t(len), uint32_t(4)); + uint32_t len_div4 = utils::div_up(uint32_t(len), uint32_t(4)); api::PipelineBarrier pipeline_barrier{}; api::context()->submit_compute_job( VK_KERNEL(fill_buffer), @@ -251,9 +251,9 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) { add_dtype_suffix(kernel_name, a); struct Params final { - api::utils::ivec3 size; + utils::ivec3 size; int32_t fill; - api::utils::vec4 values; + utils::vec4 values; }; Params block{ @@ -326,7 +326,7 @@ void test_storage_buffer_type(const size_t len) { api::ParamsBuffer params(api::context(), int32_t(len)); { - uint32_t len_div4 = api::utils::div_up(uint32_t(len), uint32_t(4)); + uint32_t len_div4 = utils::div_up(uint32_t(len), uint32_t(4)); api::PipelineBarrier pipeline_barrier{}; api::SpecVarList specialization_constants = {}; api::context()->submit_compute_job( @@ -421,14 +421,14 @@ TEST_F(VulkanComputeAPITest, buffer_tensor_sanity_check) { !api::context()->adapter_ptr()->has_full_float16_buffers_support()) { continue; } - if (dtype == api::kHalf && api::utils::multiply_integers(sizes) >= 2048) { + if (dtype == api::kHalf && utils::multiply_integers(sizes) >= 2048) { continue; } if (dtype == api::kChar && !api::context()->adapter_ptr()->has_full_int8_buffers_support()) { continue; } - if (dtype == api::kChar && api::utils::multiply_integers(sizes) >= 128) { + if (dtype == api::kChar && utils::multiply_integers(sizes) >= 128) { continue; } for (const auto& layout : @@ -919,9 +919,9 @@ TEST(VulkanComputeGraphTest, test_simple_graph) { } } -#define CREATE_WEIGHT_TENSOR(name, sizes, dtype, val) \ - std::vector data_##name(api::utils::multiply_integers(sizes)); \ - std::fill(data_##name.begin(), data_##name.end(), val); \ +#define CREATE_WEIGHT_TENSOR(name, sizes, dtype, val) \ + std::vector data_##name(utils::multiply_integers(sizes)); \ + std::fill(data_##name.begin(), data_##name.end(), val); \ ValueRef name = graph.add_tensorref(sizes, dtype, data_##name.data()); TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) { @@ -2118,9 +2118,9 @@ void test_max_pool2d( int h_offset = kernel_copy[0] - 1; int w_offset = kernel_copy[1] - 1; - int h_out = api::utils::val_at(-2, t_out->sizes()); - int w_out = api::utils::val_at(-1, t_out->sizes()); - int w_in = api::utils::val_at(-1, t_in->sizes()); + int h_out = utils::val_at(-2, t_out->sizes()); + int w_out = utils::val_at(-1, t_out->sizes()); + int w_in = utils::val_at(-1, t_in->sizes()); for (size_t i = 0; i < h_out; ++i) { for (size_t j = 0; j < w_out; ++j) { size_t idx_out = i * w_out + j; @@ -2153,7 +2153,7 @@ void test_conv2d( api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED); // Create and fill input staging buffer - const int64_t in_numel = api::utils::multiply_integers(original_sizes); + const int64_t in_numel = utils::multiply_integers(original_sizes); api::StorageBuffer staging_buffer_in(api::context(), api::kFloat, in_numel); std::vector data_in(in_numel);