pytorch · junpi3 · Jul 2, 2024 · Jul 2, 2024
diff --git a/backends/vulkan/runtime/api/QueryPool.cpp b/backends/vulkan/runtime/api/QueryPool.cpp
@@ -111,7 +111,7 @@ void QueryPool::shader_profile_begin(
   uint32_t query_idx = write_timestamp(cmd);
 
   ShaderDuration log_entry{
-      api::utils::safe_downcast<uint32_t>(shader_durations_.size()),
+      utils::safe_downcast<uint32_t>(shader_durations_.size()),
       // Execution Properties
       dispatch_id,
       kernel_name,

diff --git a/backends/vulkan/runtime/api/Tensor.cpp b/backends/vulkan/runtime/api/Tensor.cpp
@@ -23,9 +23,8 @@ std::vector<int64_t> calculate_strides(
   size_t ndim = sizes.size();
   std::vector<int64_t> strides(ndim);
 
-  const int64_t last_dim_size = texel_strides
-      ? api::utils::div_up_4(sizes.at(last_dim))
-      : sizes.at(last_dim);
+  const int64_t last_dim_size =
+      texel_strides ? utils::div_up_4(sizes.at(last_dim)) : sizes.at(last_dim);
 
   for (int stride_d = ndim - 1; stride_d >= 0; stride_d--) {
     strides.at(stride_d) = 1;
@@ -51,31 +50,30 @@ std::vector<int64_t> calculate_padded_sizes(
   }
 
   // Tensor sizes will be unsqueezed up to the next multiple of 4
-  const int64_t ndim_up4 = api::utils::align_up_4(ndim);
+  const int64_t ndim_up4 = utils::align_up_4(ndim);
   std::vector<int64_t> padded_sizes(ndim_up4);
   for (int64_t i = 0; i < ndim_up4; ++i) {
-    padded_sizes.at(i) = api::utils::val_at(i - ndim_up4, sizes);
+    padded_sizes.at(i) = utils::val_at(i - ndim_up4, sizes);
   }
 
   // Pad the packed dim to the next multiple of 4.
   const int64_t dim_offset =
       api::to_packed_dim_nchw_offset<int64_t>(memory_layout);
-  const int64_t padded_dim_size = api::utils::val_at(-dim_offset, sizes);
-  padded_sizes.at(ndim_up4 - dim_offset) =
-      api::utils::align_up_4(padded_dim_size);
+  const int64_t padded_dim_size = utils::val_at(-dim_offset, sizes);
+  padded_sizes.at(ndim_up4 - dim_offset) = utils::align_up_4(padded_dim_size);
 
   return padded_sizes;
 }
 
-api::utils::uvec3 calculate_image_extents(
+utils::uvec3 calculate_image_extents(
     const std::vector<int64_t>& padded_sizes,
     const api::GPUMemoryLayout memory_layout) {
   VK_CHECK_COND(padded_sizes.size() == 4);
 
-  uint32_t N = api::utils::safe_downcast<uint32_t>(padded_sizes.at(0));
-  uint32_t C = api::utils::safe_downcast<uint32_t>(padded_sizes.at(1));
-  uint32_t H = api::utils::safe_downcast<uint32_t>(padded_sizes.at(2));
-  uint32_t W = api::utils::safe_downcast<uint32_t>(padded_sizes.at(3));
+  uint32_t N = utils::safe_downcast<uint32_t>(padded_sizes.at(0));
+  uint32_t C = utils::safe_downcast<uint32_t>(padded_sizes.at(1));
+  uint32_t H = utils::safe_downcast<uint32_t>(padded_sizes.at(2));
+  uint32_t W = utils::safe_downcast<uint32_t>(padded_sizes.at(3));
 
   switch (memory_layout) {
     case api::kWidthPacked:
@@ -126,10 +124,10 @@ vTensor::vTensor(
           dtype_,
           allocate_memory) {
   if (storage_type != api::kBuffer) {
-    texture_limits_.limits = api::utils::ivec3{
-        api::utils::safe_downcast<int32_t>(storage_.image_extents_.data[0]),
-        api::utils::safe_downcast<int32_t>(storage_.image_extents_.data[1]),
-        api::utils::safe_downcast<int32_t>(storage_.image_extents_.data[2])};
+    texture_limits_.limits = utils::ivec3{
+        utils::safe_downcast<int32_t>(storage_.image_extents_.data[0]),
+        utils::safe_downcast<int32_t>(storage_.image_extents_.data[1]),
+        utils::safe_downcast<int32_t>(storage_.image_extents_.data[2])};
   }
 
   if (dtype == api::kHalf) {
@@ -172,8 +170,8 @@ api::VulkanBuffer& vTensor::buffer(
 
 const api::BufferBindInfo vTensor::sizes_ubo() {
   if (!sizes_uniform_.buffer()) {
-    sizes_uniform_ = api::ParamsBuffer(
-        storage_.context_, api::utils::make_whcn_ivec4(sizes_));
+    sizes_uniform_ =
+        api::ParamsBuffer(storage_.context_, utils::make_whcn_ivec4(sizes_));
   }
   return api::BufferBindInfo(sizes_uniform_.buffer());
 }
@@ -190,7 +188,7 @@ const api::BufferBindInfo vTensor::texel_strides_ubo() {
   if (!texel_strides_uniform_.buffer()) {
     texel_strides_uniform_ = api::ParamsBuffer(
         storage_.context_,
-        api::utils::make_whcn_ivec4(
+        utils::make_whcn_ivec4(
             calculate_strides(padded_sizes_, memory_layout_)));
   }
   return api::BufferBindInfo(texel_strides_uniform_.buffer());
@@ -243,23 +241,23 @@ void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
 
   // Calculate the extents of the image texture that would have been required
   // for a tensor of the new sizes.
-  api::utils::uvec3 virtual_extents =
+  utils::uvec3 virtual_extents =
       calculate_image_extents(padded_sizes_, memory_layout_);
 
   // Update the texture limits to reflect the new virtual extents.
-  texture_limits_.limits = api::utils::ivec3{
-      api::utils::safe_downcast<int32_t>(virtual_extents.data[0]),
-      api::utils::safe_downcast<int32_t>(virtual_extents.data[1]),
-      api::utils::safe_downcast<int32_t>(virtual_extents.data[2])};
+  texture_limits_.limits = utils::ivec3{
+      utils::safe_downcast<int32_t>(virtual_extents.data[0]),
+      utils::safe_downcast<int32_t>(virtual_extents.data[1]),
+      utils::safe_downcast<int32_t>(virtual_extents.data[2])};
 
   if (sizes_uniform_.buffer()) {
-    sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_));
+    sizes_uniform_.update(utils::make_whcn_ivec4(sizes_));
   }
   if (texture_limits_uniform_.buffer()) {
     texture_limits_uniform_.update(texture_limits_);
   }
   if (texel_strides_uniform_.buffer()) {
-    texel_strides_uniform_.update(api::utils::make_whcn_ivec4(
+    texel_strides_uniform_.update(utils::make_whcn_ivec4(
         calculate_strides(padded_sizes_, memory_layout_)));
   }
   if (ntexels_uniform_.buffer()) {
@@ -279,7 +277,7 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
   if (storage_type() != api::kBuffer) {
     // For texture storage check that the current texture is large enough for
     // the new sizes of the tensor.
-    api::utils::uvec3 virtual_extents =
+    utils::uvec3 virtual_extents =
         calculate_image_extents(padded_sizes_, memory_layout_);
 
     bool valid_resize = virtual_extents.data[0] <= image_extents().data[0];
@@ -302,7 +300,7 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
 
 api::VulkanImage allocate_image(
     api::Context* const context_ptr,
-    api::utils::uvec3& image_extents,
+    utils::uvec3& image_extents,
     const api::StorageType storage_type,
     const VkFormat image_format,
     const bool allocate_memory) {
@@ -375,7 +373,7 @@ vTensorStorage::vTensorStorage(
     : context_(context),
       storage_type_{storage_type},
       image_extents_(calculate_image_extents(padded_sizes, gpu_memory_layout)),
-      buffer_length_{api::utils::multiply_integers(padded_sizes)},
+      buffer_length_{utils::multiply_integers(padded_sizes)},
       image_(allocate_image(
           context_,
           image_extents_,
@@ -474,7 +472,7 @@ void vTensorStorage::discard_and_reallocate(
       api::to_vkformat(dtype),
       image_owns_memory);
 
-  buffer_length_ = api::utils::multiply_integers(padded_sizes);
+  buffer_length_ = utils::multiply_integers(padded_sizes);
   buffer_ = allocate_buffer(
       context_, buffer_length_, storage_type_, dtype, buffer_owns_memory);
 }

diff --git a/backends/vulkan/runtime/api/Tensor.h b/backends/vulkan/runtime/api/Tensor.h
@@ -54,7 +54,7 @@ std::vector<int64_t> calculate_padded_sizes(
  * Given the padded sizes of a tensor and the GPU memory layout, calculate the
  * 3D image extents required to store the tensor data as an image texture.
  */
-api::utils::uvec3 calculate_image_extents(
+utils::uvec3 calculate_image_extents(
     const std::vector<int64_t>& padded_sizes,
     const api::GPUMemoryLayout memory_layout);
 
@@ -102,7 +102,7 @@ class vTensorStorage final {
   api::StorageType storage_type_;
 
   // Resource sizings
-  api::utils::uvec3 image_extents_{};
+  utils::uvec3 image_extents_{};
   int64_t buffer_length_{};
 
   // GPU Storage
@@ -141,7 +141,7 @@ class vTensor final {
     // Alignment is required to conform with Vulkan specification; a 3 or 4
     // component vector with components of size N must have base alignment of
     // 4N.
-    alignas(16) api::utils::ivec3 limits;
+    alignas(16) utils::ivec3 limits;
   };
 
  public:
@@ -231,7 +231,7 @@ class vTensor final {
     return storage_.storage_type_ == api::kBuffer;
   }
 
-  inline const api::utils::uvec3& image_extents() const {
+  inline const utils::uvec3& image_extents() const {
     return storage_.image_extents_;
   }
 
@@ -291,12 +291,12 @@ class vTensor final {
    */
   const api::BufferBindInfo ntexels_ubo();
 
-  inline const api::utils::ivec3 texture_limits() const {
+  inline const utils::ivec3 texture_limits() const {
     return texture_limits_.limits;
   }
 
   inline size_t numel() const {
-    return api::utils::multiply_integers(sizes());
+    return utils::multiply_integers(sizes());
   }
 
   inline size_t nbytes() const {
@@ -307,15 +307,15 @@ class vTensor final {
    * Returns numel but based on padded_sizes_ instead of sizes_
    */
   inline size_t gpu_numel() const {
-    return api::utils::multiply_integers(padded_sizes_);
+    return utils::multiply_integers(padded_sizes_);
   }
 
   /*
    * Returns the number of texels in the image texture or texel buffer used to
    * store the tensor's data.
    */
   inline int32_t texel_numel() const {
-    return api::utils::safe_downcast<int32_t>(gpu_numel() / 4);
+    return utils::safe_downcast<int32_t>(gpu_numel() / 4);
   }
 
   /*

diff --git a/backends/vulkan/runtime/api/Utils.h b/backends/vulkan/runtime/api/Utils.h
@@ -27,8 +27,6 @@
 #endif //_MSC_VER
 
 namespace vkcompute {
-namespace api {
-
 namespace utils {
 
 //
@@ -299,7 +297,7 @@ inline detail::vec<T, N> divup_vec(
     const detail::vec<T, N>& b) {
   detail::vec<T, N> result;
   for (uint32_t i = 0; i < N; ++i) {
-    result.data[i] = api::utils::div_up(a.data[i], b.data[i]);
+    result.data[i] = utils::div_up(a.data[i], b.data[i]);
   }
   return result;
 }
@@ -462,6 +460,8 @@ inline int64_t multiply_integers(Iter begin, Iter end) {
 
 } // namespace utils
 
+namespace api {
+
 inline VkExtent3D create_extent3d(const utils::uvec3& extents) {
   return VkExtent3D{extents.data[0u], extents.data[1u], extents.data[2u]};
 }

diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -106,7 +106,7 @@ api::GPUMemoryLayout ComputeGraph::suggested_memory_layout(
   }
   // For 3 dimensional tensors that only have a channels dimension of 1, still
   // prefer width packed.
-  if (api::utils::val_at(-3, sizes) == 1) {
+  if (utils::val_at(-3, sizes) == 1) {
     return api::kWidthPacked;
   }
   return api::kChannelsPacked;
@@ -312,14 +312,14 @@ void ComputeGraph::update_descriptor_counts(
   }
 }
 
-api::utils::uvec3 ComputeGraph::create_global_wg_size(const ValueRef idx) {
+utils::uvec3 ComputeGraph::create_global_wg_size(const ValueRef idx) {
   if (is_buffer_storage(idx)) {
     return {uint32_t(texel_numel_of(idx)), 1u, 1u};
   }
   return image_extents_of(idx);
 }
 
-api::utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) {
+utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) {
   if (config_.enable_local_wg_size_override) {
     return config_.local_wg_size_override;
   }
@@ -328,8 +328,8 @@ api::utils::uvec3 ComputeGraph::create_local_wg_size(const ValueRef idx) {
     return {64u, 1u, 1u};
   }
 
-  const api::utils::uvec3 image_extents = image_extents_of(idx);
-  api::utils::uvec3 local_group_size = {4, 4, 4};
+  const utils::uvec3 image_extents = image_extents_of(idx);
+  utils::uvec3 local_group_size = {4, 4, 4};
 
   if (image_extents.data[2u] == 1) {
     if (image_extents.data[1u] == 1) {

diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -186,7 +186,7 @@ class ComputeGraph final {
 
   api::ScalarType dtype_of(const ValueRef idx) const;
 
-  inline api::utils::uvec3 image_extents_of(const ValueRef idx) const {
+  inline utils::uvec3 image_extents_of(const ValueRef idx) const {
     return values_.at(idx).toConstTensor().image_extents();
   }
 
@@ -454,7 +454,7 @@ class ComputeGraph final {
    * All other components will be set to 1 (i.e. {ntexels, 1, 1} will be
    * returned).
    */
-  api::utils::uvec3 create_global_wg_size(const ValueRef idx);
+  utils::uvec3 create_global_wg_size(const ValueRef idx);
 
   /*
    * Suggest a local workgroup size for a given `vTensor` value, assuming that
@@ -466,7 +466,7 @@ class ComputeGraph final {
    * Currently, the local workgroup size is hard-coded to contain a total of 64
    * shader invocations. In the future, this value can be configured.
    */
-  api::utils::uvec3 create_local_wg_size(const ValueRef idx);
+  utils::uvec3 create_local_wg_size(const ValueRef idx);
 
   //
   // Input/Output

diff --git a/backends/vulkan/runtime/graph/GraphConfig.cpp b/backends/vulkan/runtime/graph/GraphConfig.cpp
@@ -77,7 +77,7 @@ void GraphConfig::set_memory_layout_override(
 }
 
 void GraphConfig::set_local_wg_size_override(
-    const api::utils::uvec3& local_wg_size) {
+    const utils::uvec3& local_wg_size) {
   enable_local_wg_size_override = true;
   local_wg_size_override = local_wg_size;
 }

diff --git a/backends/vulkan/runtime/graph/GraphConfig.h b/backends/vulkan/runtime/graph/GraphConfig.h
@@ -31,14 +31,14 @@ struct GraphConfig final {
   bool enable_querypool;
 
   bool enable_local_wg_size_override;
-  api::utils::uvec3 local_wg_size_override;
+  utils::uvec3 local_wg_size_override;
 
   // Generate a default graph config with pre-configured settings
   explicit GraphConfig();
 
   void set_storage_type_override(api::StorageType storage_type);
   void set_memory_layout_override(api::GPUMemoryLayout memory_layout);
-  void set_local_wg_size_override(const api::utils::uvec3& local_wg_size);
+  void set_local_wg_size_override(const utils::uvec3& local_wg_size);
 };
 
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/Logging.h b/backends/vulkan/runtime/graph/Logging.h
@@ -26,20 +26,20 @@ inline std::ostream& operator<<(std::ostream& os, const std::vector<T>& vec) {
   return os; // Return the ostream to allow chaining
 }
 
-inline std::ostream& operator<<(std::ostream& os, const api::utils::uvec3& v) {
-  return api::utils::operator<<(os, v);
+inline std::ostream& operator<<(std::ostream& os, const utils::uvec3& v) {
+  return utils::operator<<(os, v);
 }
 
-inline std::ostream& operator<<(std::ostream& os, const api::utils::uvec4& v) {
-  return api::utils::operator<<(os, v);
+inline std::ostream& operator<<(std::ostream& os, const utils::uvec4& v) {
+  return utils::operator<<(os, v);
 }
 
-inline std::ostream& operator<<(std::ostream& os, const api::utils::ivec3& v) {
-  return api::utils::operator<<(os, v);
+inline std::ostream& operator<<(std::ostream& os, const utils::ivec3& v) {
+  return utils::operator<<(os, v);
 }
 
-inline std::ostream& operator<<(std::ostream& os, const api::utils::ivec4& v) {
-  return api::utils::operator<<(os, v);
+inline std::ostream& operator<<(std::ostream& os, const utils::ivec4& v) {
+  return utils::operator<<(os, v);
 }
 
 template <typename T>

diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
@@ -17,8 +17,8 @@ namespace vkcompute {
 ExecuteNode::ExecuteNode(
     ComputeGraph& graph,
     const api::ShaderInfo& shader,
-    const api::utils::uvec3& global_workgroup_size,
-    const api::utils::uvec3& local_workgroup_size,
+    const utils::uvec3& global_workgroup_size,
+    const utils::uvec3& local_workgroup_size,
     const std::vector<ArgGroup>& args,
     const api::ParamsBindList& params,
     const api::SpecVarList& spec_vars,