diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
index e958839f489..a3c268d3fd0 100644
--- a/backends/vulkan/runtime/VulkanBackend.cpp
+++ b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -59,47 +59,47 @@ const uint8_t* get_constant_data_ptr(
   return constant_data + constant_bytes->offset();
 }
 
-api::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
+vkapi::ScalarType get_scalar_type(const vkgraph::VkDataType& vk_datatype) {
   switch (vk_datatype) {
     case vkgraph::VkDataType::BOOL:
-      return api::kBool;
+      return vkapi::kBool;
     case vkgraph::VkDataType::UINT8:
-      return api::kByte;
+      return vkapi::kByte;
     case vkgraph::VkDataType::INT8:
-      return api::kChar;
+      return vkapi::kChar;
     case vkgraph::VkDataType::INT32:
-      return api::kInt;
+      return vkapi::kInt;
     case vkgraph::VkDataType::FLOAT16:
-      return api::kHalf;
+      return vkapi::kHalf;
     case vkgraph::VkDataType::FLOAT32:
-      return api::kFloat;
+      return vkapi::kFloat;
   }
 }
 
-api::StorageType get_storage_type(
+vkapi::StorageType get_storage_type(
     const vkgraph::VkStorageType& vk_storage_type) {
   switch (vk_storage_type) {
     case vkgraph::VkStorageType::BUFFER:
-      return api::kBuffer;
+      return vkapi::kBuffer;
     case vkgraph::VkStorageType::TEXTURE_3D:
-      return api::kTexture3D;
+      return vkapi::kTexture3D;
     case vkgraph::VkStorageType::TEXTURE_2D:
-      return api::kTexture2D;
+      return vkapi::kTexture2D;
     default:
       break;
   }
   VK_THROW("Invalid storage type encountered!");
 }
 
-api::GPUMemoryLayout get_memory_layout(
+vkapi::GPUMemoryLayout get_memory_layout(
     const vkgraph::VkMemoryLayout& vk_memory_layout) {
   switch (vk_memory_layout) {
     case vkgraph::VkMemoryLayout::TENSOR_WIDTH_PACKED:
-      return api::kWidthPacked;
+      return vkapi::kWidthPacked;
     case vkgraph::VkMemoryLayout::TENSOR_HEIGHT_PACKED:
-      return api::kHeightPacked;
+      return vkapi::kHeightPacked;
     case vkgraph::VkMemoryLayout::TENSOR_CHANNELS_PACKED:
-      return api::kChannelsPacked;
+      return vkapi::kChannelsPacked;
     default:
       break;
   }
@@ -115,16 +115,16 @@ GraphConfig get_graph_config(ArrayRef<CompileSpec>& compile_specs) {
     if (strcmp(spec.key, "storage_type_override") == 0) {
       ET_CHECK_MSG(value_size == sizeof(int32_t), "Unexpected value size!");
       int value_as_int = static_cast<int>(getUInt32LE(value_data));
-      api::StorageType storage_type =
-          static_cast<api::StorageType>(value_as_int);
+      vkapi::StorageType storage_type =
+          static_cast<vkapi::StorageType>(value_as_int);
 
       config.set_storage_type_override(storage_type);
     }
     if (strcmp(spec.key, "memory_layout_override") == 0) {
       ET_CHECK_MSG(value_size == sizeof(uint32_t), "Unexpected value size!");
       uint32_t value_as_int = getUInt32LE(value_data);
-      api::GPUMemoryLayout memory_layout =
-          static_cast<api::GPUMemoryLayout>(value_as_int);
+      vkapi::GPUMemoryLayout memory_layout =
+          static_cast<vkapi::GPUMemoryLayout>(value_as_int);
 
       config.set_memory_layout_override(memory_layout);
     }
@@ -171,8 +171,8 @@ class GraphBuilder {
   }
 
   void add_tensor_to_graph(const uint32_t fb_id, VkTensorPtr tensor_fb) {
-    const api::ScalarType& dtype = get_scalar_type(tensor_fb->datatype());
-    api::StorageType storage_type =
+    const vkapi::ScalarType& dtype = get_scalar_type(tensor_fb->datatype());
+    vkapi::StorageType storage_type =
         tensor_fb->storage_type() == vkgraph::VkStorageType::DEFAULT_STORAGE
         ? compute_graph_->suggested_storage_type()
         : get_storage_type(tensor_fb->storage_type());
@@ -180,7 +180,7 @@ class GraphBuilder {
     UIntVector dims_fb = tensor_fb->dims();
     const std::vector<int64_t> dims_vector(dims_fb->cbegin(), dims_fb->cend());
 
-    api::GPUMemoryLayout memory_layout =
+    vkapi::GPUMemoryLayout memory_layout =
         tensor_fb->memory_layout() == vkgraph::VkMemoryLayout::DEFAULT_LAYOUT
         ? compute_graph_->suggested_memory_layout(dims_vector)
         : get_memory_layout(tensor_fb->memory_layout());
diff --git a/backends/vulkan/runtime/api/Context.cpp b/backends/vulkan/runtime/api/Context.cpp
index 18415857495..7e97effc137 100644
--- a/backends/vulkan/runtime/api/Context.cpp
+++ b/backends/vulkan/runtime/api/Context.cpp
@@ -7,7 +7,8 @@
  */
 
 #include <executorch/backends/vulkan/runtime/api/Context.h>
-#include <executorch/backends/vulkan/runtime/api/VkUtils.h>
+
+#include <executorch/backends/vulkan/runtime/api/vk_api/VkUtils.h>
 
 #ifndef VULKAN_DESCRIPTOR_POOL_SIZE
 #define VULKAN_DESCRIPTOR_POOL_SIZE 1024u
@@ -23,7 +24,7 @@ namespace api {
 Context::Context(size_t adapter_i, const ContextConfig& config)
     : config_(config),
       // Important handles
-      adapter_p_(runtime()->get_adapter_p(adapter_i)),
+      adapter_p_(vkapi::runtime()->get_adapter_p(adapter_i)),
       device_(adapter_p_->device_handle()),
       queue_(adapter_p_->request_queue()),
       // Resource pools
@@ -72,8 +73,8 @@ void Context::report_shader_dispatch_start(
         cmd_,
         dispatch_id,
         shader_name,
-        create_extent3d(global_wg_size),
-        create_extent3d(local_wg_size));
+        vkapi::create_extent3d(global_wg_size),
+        vkapi::create_extent3d(local_wg_size));
   }
 }
 
@@ -83,17 +84,17 @@ void Context::report_shader_dispatch_end() {
   }
 }
 
-DescriptorSet Context::get_descriptor_set(
-    const ShaderInfo& shader_descriptor,
+vkapi::DescriptorSet Context::get_descriptor_set(
+    const vkapi::ShaderInfo& shader_descriptor,
     const utils::uvec3& local_workgroup_size,
-    const SpecVarList& additional_constants) {
+    const vkapi::SpecVarList& additional_constants) {
   VkDescriptorSetLayout shader_layout =
       shader_layout_cache().retrieve(shader_descriptor.kernel_layout);
 
   VkPipelineLayout pipeline_layout =
       pipeline_layout_cache().retrieve(shader_layout);
 
-  SpecVarList spec_constants = {
+  vkapi::SpecVarList spec_constants = {
       SV(local_workgroup_size.data[0u]),
       SV(local_workgroup_size.data[1u]),
       SV(local_workgroup_size.data[2u])};
@@ -112,9 +113,9 @@ DescriptorSet Context::get_descriptor_set(
 }
 
 void Context::register_shader_dispatch(
-    const DescriptorSet& descriptors,
-    PipelineBarrier& pipeline_barrier,
-    const ShaderInfo& shader_descriptor,
+    const vkapi::DescriptorSet& descriptors,
+    vkapi::PipelineBarrier& pipeline_barrier,
+    const vkapi::ShaderInfo& shader_descriptor,
     const utils::uvec3& global_workgroup_size) {
   // Adjust the global workgroup size based on the output tile size
   uint32_t global_wg_w = utils::div_up(
@@ -180,12 +181,12 @@ Context* context() {
     try {
       const uint32_t cmd_submit_frequency = 16u;
 
-      const CommandPoolConfig cmd_config{
+      const vkapi::CommandPoolConfig cmd_config{
           32u, // cmdPoolInitialSize
           8u, // cmdPoolBatchSize
       };
 
-      const DescriptorPoolConfig descriptor_pool_config{
+      const vkapi::DescriptorPoolConfig descriptor_pool_config{
           VULKAN_DESCRIPTOR_POOL_SIZE, // descriptorPoolMaxSets
           VULKAN_DESCRIPTOR_POOL_SIZE, // descriptorUniformBufferCount
           VULKAN_DESCRIPTOR_POOL_SIZE, // descriptorStorageBufferCount
@@ -194,7 +195,7 @@ Context* context() {
           32u, // descriptorPileSizes
       };
 
-      const QueryPoolConfig query_pool_config{
+      const vkapi::QueryPoolConfig query_pool_config{
           VULKAN_QUERY_POOL_SIZE, // maxQueryCount
           256u, // initialReserveSize
       };
@@ -206,7 +207,7 @@ Context* context() {
           query_pool_config,
       };
 
-      return new Context(runtime()->default_adapter_i(), config);
+      return new Context(vkapi::runtime()->default_adapter_i(), config);
     } catch (...) {
     }
 
diff --git a/backends/vulkan/runtime/api/Context.h b/backends/vulkan/runtime/api/Context.h
index cf411c86012..530babf08a2 100644
--- a/backends/vulkan/runtime/api/Context.h
+++ b/backends/vulkan/runtime/api/Context.h
@@ -10,23 +10,23 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/Adapter.h>
-#include <executorch/backends/vulkan/runtime/api/Command.h>
-#include <executorch/backends/vulkan/runtime/api/Descriptor.h>
-#include <executorch/backends/vulkan/runtime/api/Fence.h>
-#include <executorch/backends/vulkan/runtime/api/QueryPool.h>
-#include <executorch/backends/vulkan/runtime/api/Runtime.h>
-
 #include <executorch/backends/vulkan/runtime/api/utils/MacroUtils.h>
 
+#include <executorch/backends/vulkan/runtime/api/vk_api/Adapter.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Command.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Descriptor.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Fence.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/QueryPool.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Runtime.h>
+
 namespace vkcompute {
 namespace api {
 
 struct ContextConfig final {
   uint32_t cmd_submit_frequency;
-  CommandPoolConfig cmd_pool_config;
-  DescriptorPoolConfig descriptor_pool_config;
-  QueryPoolConfig query_pool_config;
+  vkapi::CommandPoolConfig cmd_pool_config;
+  vkapi::DescriptorPoolConfig descriptor_pool_config;
+  vkapi::QueryPoolConfig query_pool_config;
 };
 
 //
@@ -54,29 +54,29 @@ class Context final {
   // Config
   ContextConfig config_;
   // Important handles
-  Adapter* adapter_p_;
+  vkapi::Adapter* adapter_p_;
   VkDevice device_;
-  Adapter::Queue queue_;
+  vkapi::Adapter::Queue queue_;
   // Resource Pools
-  CommandPool command_pool_;
-  DescriptorPool descriptor_pool_;
-  FencePool fences_;
+  vkapi::CommandPool command_pool_;
+  vkapi::DescriptorPool descriptor_pool_;
+  vkapi::FencePool fences_;
   // Diagnostics
-  QueryPool querypool_;
+  vkapi::QueryPool querypool_;
   // Command buffers submission
   std::mutex cmd_mutex_;
-  CommandBuffer cmd_;
+  vkapi::CommandBuffer cmd_;
   uint32_t submit_count_;
   // Memory Management
   std::mutex buffer_clearlist_mutex_;
-  std::vector<VulkanBuffer> buffers_to_clear_;
+  std::vector<vkapi::VulkanBuffer> buffers_to_clear_;
   std::mutex image_clearlist_mutex_;
-  std::vector<VulkanImage> images_to_clear_;
+  std::vector<vkapi::VulkanImage> images_to_clear_;
 
  public:
   // Adapter access
 
-  inline Adapter* adapter_ptr() {
+  inline vkapi::Adapter* adapter_ptr() {
     return adapter_p_;
   }
 
@@ -90,35 +90,35 @@ class Context final {
 
   // Device Caches
 
-  inline ShaderLayoutCache& shader_layout_cache() {
+  inline vkapi::ShaderLayoutCache& shader_layout_cache() {
     return adapter_ptr()->shader_layout_cache();
   }
 
-  inline ShaderCache& shader_cache() {
+  inline vkapi::ShaderCache& shader_cache() {
     return adapter_ptr()->shader_cache();
   }
 
-  inline PipelineLayoutCache& pipeline_layout_cache() {
+  inline vkapi::PipelineLayoutCache& pipeline_layout_cache() {
     return adapter_ptr()->pipeline_layout_cache();
   }
 
-  inline ComputePipelineCache& pipeline_cache() {
+  inline vkapi::ComputePipelineCache& pipeline_cache() {
     return adapter_ptr()->compute_pipeline_cache();
   }
 
   // Resource Pools
 
-  inline DescriptorPool& descriptor_pool() {
+  inline vkapi::DescriptorPool& descriptor_pool() {
     return descriptor_pool_;
   }
 
-  inline FencePool& fences() {
+  inline vkapi::FencePool& fences() {
     return fences_;
   }
 
   // Diagnostics
 
-  inline QueryPool& querypool() {
+  inline vkapi::QueryPool& querypool() {
     return querypool_;
   }
 
@@ -155,12 +155,12 @@ class Context final {
 
   // Memory Management
 
-  void register_buffer_cleanup(VulkanBuffer& buffer) {
+  void register_buffer_cleanup(vkapi::VulkanBuffer& buffer) {
     std::lock_guard<std::mutex> bufferlist_lock(buffer_clearlist_mutex_);
     buffers_to_clear_.emplace_back(std::move(buffer));
   }
 
-  void register_image_cleanup(VulkanImage& image) {
+  void register_image_cleanup(vkapi::VulkanImage& image) {
     std::lock_guard<std::mutex> imagelist_lock(image_clearlist_mutex_);
     images_to_clear_.emplace_back(std::move(image));
   }
@@ -178,30 +178,30 @@ class Context final {
     }
   }
 
-  DescriptorSet get_descriptor_set(
-      const ShaderInfo&,
+  vkapi::DescriptorSet get_descriptor_set(
+      const vkapi::ShaderInfo&,
       const utils::uvec3&,
-      const SpecVarList&);
+      const vkapi::SpecVarList&);
 
-  inline DescriptorSet get_descriptor_set(
-      const ShaderInfo& shader_descriptor,
+  inline vkapi::DescriptorSet get_descriptor_set(
+      const vkapi::ShaderInfo& shader_descriptor,
       const utils::uvec3& local_work_group_size) {
     return get_descriptor_set(shader_descriptor, local_work_group_size, {});
   }
 
   void register_shader_dispatch(
-      const DescriptorSet&,
-      PipelineBarrier&,
-      const ShaderInfo&,
+      const vkapi::DescriptorSet&,
+      vkapi::PipelineBarrier&,
+      const vkapi::ShaderInfo&,
       const utils::uvec3&);
 
   template <typename... Arguments>
   bool submit_compute_job(
-      const ShaderInfo&,
-      PipelineBarrier&,
+      const vkapi::ShaderInfo&,
+      vkapi::PipelineBarrier&,
       const utils::uvec3&,
       const utils::uvec3&,
-      const SpecVarList&,
+      const vkapi::SpecVarList&,
       VkFence fence_handle,
       const uint32_t dispatch_id,
       Arguments&&...);
@@ -221,17 +221,21 @@ Context* context();
 
 namespace detail {
 
-inline void arg_is_empty(bool& any_is_empty, const VulkanBuffer& buffer) {
+inline void arg_is_empty(
+    bool& any_is_empty,
+    const vkapi::VulkanBuffer& buffer) {
   // bool(buffer) will evaluate to false if no memory has been allocated
   any_is_empty = any_is_empty || !buffer;
 }
 
-inline void arg_is_empty(bool& any_is_empty, const VulkanImage& image) {
+inline void arg_is_empty(bool& any_is_empty, const vkapi::VulkanImage& image) {
   // bool(image) will evaluate to false if no memory has been allocated
   any_is_empty = any_is_empty || !image;
 }
 
-inline void arg_is_empty(bool& any_is_empty, const BufferBindInfo& bind_info) {
+inline void arg_is_empty(
+    bool& any_is_empty,
+    const vkapi::BufferBindInfo& bind_info) {
   any_is_empty = any_is_empty || (bind_info.handle == VK_NULL_HANDLE);
 }
 
@@ -252,7 +256,7 @@ inline bool any_arg_is_empty(Arguments&&... arguments) {
 
 template <size_t... Indices, typename... Arguments>
 inline void bind(
-    DescriptorSet& descriptor_set,
+    vkapi::DescriptorSet& descriptor_set,
     const std::index_sequence<Indices...>&,
     Arguments&&... arguments) {
   VK_UNUSED const int _[]{
@@ -272,11 +276,11 @@ inline void bind(
  */
 template <typename... Arguments>
 inline bool Context::submit_compute_job(
-    const ShaderInfo& shader,
-    PipelineBarrier& pipeline_barrier,
+    const vkapi::ShaderInfo& shader,
+    vkapi::PipelineBarrier& pipeline_barrier,
     const utils::uvec3& global_work_group,
     const utils::uvec3& local_work_group_size,
-    const SpecVarList& specialization_constants,
+    const vkapi::SpecVarList& specialization_constants,
     VkFence fence_handle,
     const uint32_t dispatch_id,
     Arguments&&... arguments) {
@@ -315,7 +319,7 @@ inline bool Context::submit_compute_job(
       dispatch_id);
 
   // Factor out template parameter independent code to minimize code bloat.
-  DescriptorSet descriptor_set = get_descriptor_set(
+  vkapi::DescriptorSet descriptor_set = get_descriptor_set(
       shader, local_work_group_size, specialization_constants);
 
   detail::bind(
diff --git a/backends/vulkan/runtime/api/ParamsBuffer.cpp b/backends/vulkan/runtime/api/ParamsBuffer.cpp
index 28ac835e1da..bc977ff54a1 100644
--- a/backends/vulkan/runtime/api/ParamsBuffer.cpp
+++ b/backends/vulkan/runtime/api/ParamsBuffer.cpp
@@ -15,10 +15,12 @@ namespace api {
 
 namespace {
 
-void memcpy_to_buffer(const VulkanBuffer& src, VulkanBuffer& dst) {
-  MemoryMap dst_mapping(dst, MemoryAccessType::WRITE);
+void memcpy_to_buffer(
+    const vkapi::VulkanBuffer& src,
+    vkapi::VulkanBuffer& dst) {
+  vkapi::MemoryMap dst_mapping(dst, vkapi::MemoryAccessType::WRITE);
 
-  MemoryMap src_mapping(src, MemoryAccessType::READ);
+  vkapi::MemoryMap src_mapping(src, vkapi::MemoryAccessType::READ);
   src_mapping.invalidate();
 
   void* dst_ptr = dst_mapping.template data<void>();
@@ -46,7 +48,7 @@ ParamsBuffer& ParamsBuffer::operator=(const ParamsBuffer& other) {
 
     // Move vulkan_buffer_ to another VulkanBuffer for cleanup
     if (vulkan_buffer_) {
-      VulkanBuffer temp_buffer(std::move(vulkan_buffer_));
+      vkapi::VulkanBuffer temp_buffer(std::move(vulkan_buffer_));
       context_p_->register_buffer_cleanup(temp_buffer);
     }
     // vulkan_buffer_ should now be empty
diff --git a/backends/vulkan/runtime/api/ParamsBuffer.h b/backends/vulkan/runtime/api/ParamsBuffer.h
index 2cf6452efc8..2a15982dd7f 100644
--- a/backends/vulkan/runtime/api/ParamsBuffer.h
+++ b/backends/vulkan/runtime/api/ParamsBuffer.h
@@ -12,7 +12,7 @@
 
 #include <executorch/backends/vulkan/runtime/api/Context.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/Buffer.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Buffer.h>
 
 namespace vkcompute {
 namespace api {
@@ -21,7 +21,7 @@ class ParamsBuffer final {
  private:
   Context* context_p_;
   size_t nbytes_;
-  VulkanBuffer vulkan_buffer_;
+  vkapi::VulkanBuffer vulkan_buffer_;
 
  public:
   ParamsBuffer() : context_p_{nullptr}, vulkan_buffer_{} {}
@@ -45,7 +45,7 @@ class ParamsBuffer final {
     }
   }
 
-  const VulkanBuffer& buffer() const {
+  const vkapi::VulkanBuffer& buffer() const {
     return vulkan_buffer_;
   }
 
@@ -56,7 +56,7 @@ class ParamsBuffer final {
     }
     // Fill the uniform buffer with data in block
     {
-      MemoryMap mapping(vulkan_buffer_, MemoryAccessType::WRITE);
+      vkapi::MemoryMap mapping(vulkan_buffer_, vkapi::MemoryAccessType::WRITE);
       Block* data_ptr = mapping.template data<Block>();
 
       *data_ptr = block;
diff --git a/backends/vulkan/runtime/api/ShaderRegistry.cpp b/backends/vulkan/runtime/api/ShaderRegistry.cpp
index 0328182fd83..f828e561a25 100644
--- a/backends/vulkan/runtime/api/ShaderRegistry.cpp
+++ b/backends/vulkan/runtime/api/ShaderRegistry.cpp
@@ -21,7 +21,7 @@ bool ShaderRegistry::has_dispatch(const std::string& op_name) {
   return it != registry_.end();
 }
 
-void ShaderRegistry::register_shader(ShaderInfo&& shader_info) {
+void ShaderRegistry::register_shader(vkapi::ShaderInfo&& shader_info) {
   if (has_shader(shader_info.kernel_name)) {
     VK_THROW(
         "Shader with name ", shader_info.kernel_name, "already registered");
@@ -44,7 +44,7 @@ void ShaderRegistry::register_op_dispatch(
   }
 }
 
-const ShaderInfo& ShaderRegistry::get_shader_info(
+const vkapi::ShaderInfo& ShaderRegistry::get_shader_info(
     const std::string& shader_name) {
   const ShaderListing::const_iterator it = listings_.find(shader_name);
 
diff --git a/backends/vulkan/runtime/api/ShaderRegistry.h b/backends/vulkan/runtime/api/ShaderRegistry.h
index a1c17102a7f..2e324c97345 100644
--- a/backends/vulkan/runtime/api/ShaderRegistry.h
+++ b/backends/vulkan/runtime/api/ShaderRegistry.h
@@ -10,7 +10,7 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/Shader.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Shader.h>
 
 #include <string>
 #include <unordered_map>
@@ -32,7 +32,7 @@ enum class DispatchKey : int8_t {
 };
 
 class ShaderRegistry final {
-  using ShaderListing = std::unordered_map<std::string, ShaderInfo>;
+  using ShaderListing = std::unordered_map<std::string, vkapi::ShaderInfo>;
   using Dispatcher = std::unordered_map<DispatchKey, std::string>;
   using Registry = std::unordered_map<std::string, Dispatcher>;
 
@@ -54,7 +54,7 @@ class ShaderRegistry final {
   /*
    * Register a ShaderInfo to a given shader name
    */
-  void register_shader(ShaderInfo&& shader_info);
+  void register_shader(vkapi::ShaderInfo&& shader_info);
 
   /*
    * Register a dispatch entry to the given op name
@@ -67,7 +67,7 @@ class ShaderRegistry final {
   /*
    * Given a shader name, return the ShaderInfo which contains the SPIRV binary
    */
-  const ShaderInfo& get_shader_info(const std::string& shader_name);
+  const vkapi::ShaderInfo& get_shader_info(const std::string& shader_name);
 };
 
 class ShaderRegisterInit final {
diff --git a/backends/vulkan/runtime/api/StorageBuffer.h b/backends/vulkan/runtime/api/StorageBuffer.h
index 4e8128ee7c5..c1b96db4c96 100644
--- a/backends/vulkan/runtime/api/StorageBuffer.h
+++ b/backends/vulkan/runtime/api/StorageBuffer.h
@@ -12,7 +12,7 @@
 
 #include <executorch/backends/vulkan/runtime/api/Context.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/Buffer.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Buffer.h>
 
 namespace vkcompute {
 namespace api {
@@ -20,15 +20,15 @@ namespace api {
 class StorageBuffer final {
  private:
   Context* context_p_;
-  ScalarType dtype_;
+  vkapi::ScalarType dtype_;
   size_t numel_;
   size_t nbytes_;
-  VulkanBuffer vulkan_buffer_;
+  vkapi::VulkanBuffer vulkan_buffer_;
 
  public:
   StorageBuffer(
       Context* context_p,
-      const ScalarType dtype,
+      const vkapi::ScalarType dtype,
       const size_t numel,
       const bool gpuonly = false)
       : context_p_(context_p),
@@ -49,11 +49,11 @@ class StorageBuffer final {
     context_p_->register_buffer_cleanup(vulkan_buffer_);
   }
 
-  inline ScalarType dtype() {
+  inline vkapi::ScalarType dtype() {
     return dtype_;
   }
 
-  inline VulkanBuffer& buffer() {
+  inline vkapi::VulkanBuffer& buffer() {
     return vulkan_buffer_;
   }
 
diff --git a/backends/vulkan/runtime/api/Tensor.cpp b/backends/vulkan/runtime/api/Tensor.cpp
index e74a7abaec8..194c6fee5eb 100644
--- a/backends/vulkan/runtime/api/Tensor.cpp
+++ b/backends/vulkan/runtime/api/Tensor.cpp
@@ -7,16 +7,18 @@
  */
 
 #include <executorch/backends/vulkan/runtime/api/Tensor.h>
-#include <executorch/backends/vulkan/runtime/api/VkUtils.h>
+
+#include <executorch/backends/vulkan/runtime/api/vk_api/VkUtils.h>
 
 namespace vkcompute {
 namespace api {
 
 std::vector<int64_t> calculate_strides(
     const std::vector<int64_t>& sizes,
-    const GPUMemoryLayout memory_layout,
+    const vkapi::GPUMemoryLayout memory_layout,
     const bool texel_strides) {
-  const int64_t dim_offset = to_packed_dim_nchw_offset<int64_t>(memory_layout);
+  const int64_t dim_offset =
+      vkapi::to_packed_dim_nchw_offset<int64_t>(memory_layout);
   const int64_t last_dim = sizes.size() - dim_offset;
   VK_CHECK_COND(last_dim >= 0);
 
@@ -43,7 +45,7 @@ std::vector<int64_t> calculate_strides(
 
 std::vector<int64_t> calculate_padded_sizes(
     const std::vector<int64_t>& sizes,
-    const GPUMemoryLayout memory_layout) {
+    const vkapi::GPUMemoryLayout memory_layout) {
   int64_t ndim = sizes.size();
   if (ndim == 0) {
     ndim = 1;
@@ -57,7 +59,8 @@ std::vector<int64_t> calculate_padded_sizes(
   }
 
   // Pad the packed dim to the next multiple of 4.
-  const int64_t dim_offset = to_packed_dim_nchw_offset<int64_t>(memory_layout);
+  const int64_t dim_offset =
+      vkapi::to_packed_dim_nchw_offset<int64_t>(memory_layout);
   const int64_t padded_dim_size = utils::val_at(-dim_offset, sizes);
   padded_sizes.at(ndim_up4 - dim_offset) = utils::align_up_4(padded_dim_size);
 
@@ -66,7 +69,7 @@ std::vector<int64_t> calculate_padded_sizes(
 
 utils::uvec3 calculate_image_extents(
     const std::vector<int64_t>& padded_sizes,
-    const GPUMemoryLayout memory_layout) {
+    const vkapi::GPUMemoryLayout memory_layout) {
   VK_CHECK_COND(padded_sizes.size() == 4);
 
   uint32_t N = utils::safe_downcast<uint32_t>(padded_sizes.at(0));
@@ -75,15 +78,15 @@ utils::uvec3 calculate_image_extents(
   uint32_t W = utils::safe_downcast<uint32_t>(padded_sizes.at(3));
 
   switch (memory_layout) {
-    case kWidthPacked:
+    case vkapi::kWidthPacked:
       VK_CHECK_COND(W % 4 == 0);
       W /= 4;
       break;
-    case kHeightPacked:
+    case vkapi::kHeightPacked:
       VK_CHECK_COND(H % 4 == 0);
       H /= 4;
       break;
-    case kChannelsPacked:
+    case vkapi::kChannelsPacked:
       VK_CHECK_COND(C % 4 == 0);
       C /= 4;
       break;
@@ -99,9 +102,9 @@ utils::uvec3 calculate_image_extents(
 vTensor::vTensor(
     Context* const context,
     const std::vector<int64_t>& sizes,
-    const ScalarType dtype,
-    const StorageType storage_type,
-    const GPUMemoryLayout memory_layout,
+    const vkapi::ScalarType dtype,
+    const vkapi::StorageType storage_type,
+    const vkapi::GPUMemoryLayout memory_layout,
     const bool allocate_memory)
     : dtype_(dtype),
       memory_layout_(memory_layout),
@@ -122,14 +125,14 @@ vTensor::vTensor(
           padded_sizes_,
           dtype_,
           allocate_memory) {
-  if (storage_type != kBuffer) {
+  if (storage_type != vkapi::kBuffer) {
     texture_limits_.limits = utils::ivec3{
         utils::safe_downcast<int32_t>(storage_.image_extents_.data[0]),
         utils::safe_downcast<int32_t>(storage_.image_extents_.data[1]),
         utils::safe_downcast<int32_t>(storage_.image_extents_.data[2])};
   }
 
-  if (dtype == kHalf) {
+  if (dtype == vkapi::kHalf) {
     VK_CHECK_COND(
         api::context()->adapter_ptr()->has_16bit_storage(),
         "Half dtype is only available if the physical device supports float16 "
@@ -137,74 +140,74 @@ vTensor::vTensor(
   }
 }
 
-VulkanImage& vTensor::image(
-    PipelineBarrier& pipeline_barrier,
-    const PipelineStageFlags stage) & {
-  storage_.transition(pipeline_barrier, stage, MemoryAccessType::READ);
+vkapi::VulkanImage& vTensor::image(
+    vkapi::PipelineBarrier& pipeline_barrier,
+    const vkapi::PipelineStageFlags stage) & {
+  storage_.transition(pipeline_barrier, stage, vkapi::MemoryAccessType::READ);
   return storage_.image_;
 }
 
-VulkanImage& vTensor::image(
-    PipelineBarrier& pipeline_barrier,
-    const PipelineStageFlags stage,
-    const MemoryAccessFlags access) & {
+vkapi::VulkanImage& vTensor::image(
+    vkapi::PipelineBarrier& pipeline_barrier,
+    const vkapi::PipelineStageFlags stage,
+    const vkapi::MemoryAccessFlags access) & {
   storage_.transition(pipeline_barrier, stage, access);
   return storage_.image_;
 }
 
-VulkanBuffer& vTensor::buffer(
-    PipelineBarrier& pipeline_barrier,
-    const PipelineStageFlags stage) & {
-  storage_.transition(pipeline_barrier, stage, MemoryAccessType::READ);
+vkapi::VulkanBuffer& vTensor::buffer(
+    vkapi::PipelineBarrier& pipeline_barrier,
+    const vkapi::PipelineStageFlags stage) & {
+  storage_.transition(pipeline_barrier, stage, vkapi::MemoryAccessType::READ);
   return storage_.buffer_;
 }
 
-VulkanBuffer& vTensor::buffer(
-    PipelineBarrier& pipeline_barrier,
-    const PipelineStageFlags stage,
-    const MemoryAccessFlags access) & {
+vkapi::VulkanBuffer& vTensor::buffer(
+    vkapi::PipelineBarrier& pipeline_barrier,
+    const vkapi::PipelineStageFlags stage,
+    const vkapi::MemoryAccessFlags access) & {
   storage_.transition(pipeline_barrier, stage, access);
   return storage_.buffer_;
 }
 
-const BufferBindInfo vTensor::sizes_ubo() {
+const vkapi::BufferBindInfo vTensor::sizes_ubo() {
   if (!sizes_uniform_.buffer()) {
     sizes_uniform_ =
         ParamsBuffer(storage_.context_, utils::make_whcn_ivec4(sizes_));
   }
-  return BufferBindInfo(sizes_uniform_.buffer());
+  return vkapi::BufferBindInfo(sizes_uniform_.buffer());
 }
 
-const BufferBindInfo vTensor::texture_limits_ubo() {
+const vkapi::BufferBindInfo vTensor::texture_limits_ubo() {
   if (!texture_limits_uniform_.buffer()) {
     texture_limits_uniform_ = ParamsBuffer(storage_.context_, texture_limits_);
   }
-  return BufferBindInfo(texture_limits_uniform_.buffer());
+  return vkapi::BufferBindInfo(texture_limits_uniform_.buffer());
 }
 
-const BufferBindInfo vTensor::texel_strides_ubo() {
+const vkapi::BufferBindInfo vTensor::texel_strides_ubo() {
   if (!texel_strides_uniform_.buffer()) {
     texel_strides_uniform_ = ParamsBuffer(
         storage_.context_,
         utils::make_whcn_ivec4(
             calculate_strides(padded_sizes_, memory_layout_)));
   }
-  return BufferBindInfo(texel_strides_uniform_.buffer());
+  return vkapi::BufferBindInfo(texel_strides_uniform_.buffer());
 }
 
-const BufferBindInfo vTensor::ntexels_ubo() {
+const vkapi::BufferBindInfo vTensor::ntexels_ubo() {
   if (!ntexels_uniform_.buffer()) {
     ntexels_uniform_ = ParamsBuffer(storage_.context_, texel_numel());
   }
-  return BufferBindInfo(ntexels_uniform_.buffer());
+  return vkapi::BufferBindInfo(ntexels_uniform_.buffer());
 }
 
 VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
   switch (storage_type()) {
-    case kBuffer:
+    case vkapi::kBuffer:
       return storage_.buffer_.allocation_create_info();
-    case kTexture2D:
-    case kTexture3D:
+    case vkapi::kTexture2D:
+    case vkapi::kTexture3D:
       return storage_.image_.allocation_create_info();
   }
   return {};
@@ -212,22 +215,22 @@ VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
 
 VkMemoryRequirements vTensor::get_memory_requirements() const {
   switch (storage_type()) {
-    case kBuffer:
+    case vkapi::kBuffer:
       return storage_.buffer_.get_memory_requirements();
-    case kTexture2D:
-    case kTexture3D:
+    case vkapi::kTexture2D:
+    case vkapi::kTexture3D:
       return storage_.image_.get_memory_requirements();
   }
   return {};
 }
 
-void vTensor::bind_allocation(const Allocation& allocation) {
+void vTensor::bind_allocation(const vkapi::Allocation& allocation) {
   switch (storage_type()) {
-    case kBuffer:
+    case vkapi::kBuffer:
       storage_.buffer_.bind_allocation(allocation);
       break;
-    case kTexture2D:
-    case kTexture3D:
+    case vkapi::kTexture2D:
+    case vkapi::kTexture3D:
       storage_.image_.bind_allocation(allocation);
       break;
   }
@@ -272,7 +275,7 @@ void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
 }
 
 void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
-  if (storage_type() != kBuffer) {
+  if (storage_type() != vkapi::kBuffer) {
     // For texture storage check that the current texture is large enough for
     // the new sizes of the tensor.
     utils::uvec3 virtual_extents =
@@ -296,15 +299,15 @@ void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
 // vTensorStorage
 //
 
-VulkanImage allocate_image(
+vkapi::VulkanImage allocate_image(
     Context* const context_ptr,
     utils::uvec3& image_extents,
-    const StorageType storage_type,
+    const vkapi::StorageType storage_type,
     const VkFormat image_format,
     const bool allocate_memory) {
-  Adapter* adapter_ptr = context_ptr->adapter_ptr();
+  vkapi::Adapter* adapter_ptr = context_ptr->adapter_ptr();
 
-  ImageSampler::Properties sampler_props{
+  vkapi::ImageSampler::Properties sampler_props{
       VK_FILTER_NEAREST,
       VK_SAMPLER_MIPMAP_MODE_NEAREST,
       VK_SAMPLER_ADDRESS_MODE_REPEAT,
@@ -315,23 +318,23 @@ VulkanImage allocate_image(
   VkImageViewType image_view_type;
 
   switch (storage_type) {
-    case kTexture3D:
+    case vkapi::kTexture3D:
       image_type = VK_IMAGE_TYPE_3D;
       image_view_type = VK_IMAGE_VIEW_TYPE_3D;
       break;
-    case kTexture2D:
+    case vkapi::kTexture2D:
       image_type = VK_IMAGE_TYPE_2D;
       image_view_type = VK_IMAGE_VIEW_TYPE_2D;
       break;
     default:
       // Return an empty VulkanImage by default
-      return VulkanImage();
+      return vkapi::VulkanImage();
   }
 
   VkSampler sampler = adapter_ptr->sampler_cache().retrieve(sampler_props);
 
   return adapter_ptr->vma().create_image(
-      create_extent3d(image_extents),
+      vkapi::create_extent3d(image_extents),
       image_format,
       image_type,
       image_view_type,
@@ -341,20 +344,20 @@ VulkanImage allocate_image(
       /*allocate_memory = */ allocate_memory);
 }
 
-VulkanBuffer allocate_buffer(
+vkapi::VulkanBuffer allocate_buffer(
     Context* const context_ptr,
     const int64_t numel,
-    const StorageType storage_type,
-    const ScalarType dtype,
+    const vkapi::StorageType storage_type,
+    const vkapi::ScalarType dtype,
     const bool allocate_memory) {
-  Adapter* adapter_ptr = context_ptr->adapter_ptr();
+  vkapi::Adapter* adapter_ptr = context_ptr->adapter_ptr();
 
   switch (storage_type) {
-    case kBuffer:
+    case vkapi::kBuffer:
       break;
     default:
       // Return an empty VulkanBuffer if Buffer storage is not used
-      return VulkanBuffer();
+      return vkapi::VulkanBuffer();
   }
 
   return adapter_ptr->vma().create_storage_buffer(
@@ -363,10 +366,10 @@ VulkanBuffer allocate_buffer(
 
 vTensorStorage::vTensorStorage(
     Context* const context,
-    const StorageType storage_type,
-    const GPUMemoryLayout gpu_memory_layout,
+    const vkapi::StorageType storage_type,
+    const vkapi::GPUMemoryLayout gpu_memory_layout,
     const std::vector<int64_t>& padded_sizes,
-    const ScalarType dtype,
+    const vkapi::ScalarType dtype,
     const bool allocate_memory)
     : context_(context),
       storage_type_{storage_type},
@@ -400,31 +403,31 @@ void vTensorStorage::flush() {
 }
 
 void vTensorStorage::transition(
-    PipelineBarrier& pipeline_barrier,
-    const PipelineStageFlags cur_stage,
-    const MemoryAccessFlags cur_access) {
+    vkapi::PipelineBarrier& pipeline_barrier,
+    const vkapi::PipelineStageFlags cur_stage,
+    const vkapi::MemoryAccessFlags cur_access) {
   // Get last stage access
-  PipelineStageFlags prev_stage = last_access_.stage;
-  MemoryAccessFlags prev_access = last_access_.access;
+  vkapi::PipelineStageFlags prev_stage = last_access_.stage;
+  vkapi::MemoryAccessFlags prev_access = last_access_.access;
 
-  const bool prev_written = (prev_access & MemoryAccessType::WRITE) != 0;
+  const bool prev_written = (prev_access & vkapi::MemoryAccessType::WRITE) != 0;
 
   VkImageLayout cur_layout = VK_IMAGE_LAYOUT_UNDEFINED;
   VkImageLayout new_layout = VK_IMAGE_LAYOUT_UNDEFINED;
   bool layout_changed = false;
   if (image_) {
     cur_layout = image_.layout();
-    new_layout = vk_layout(cur_stage, cur_access);
+    new_layout = vkapi::vk_layout(cur_stage, cur_access);
 
     layout_changed = cur_layout != new_layout;
   }
 
   if (prev_written || layout_changed) {
-    VkPipelineStageFlags src_stage = vk_stage(prev_stage);
+    VkPipelineStageFlags src_stage = vkapi::vk_stage(prev_stage);
     if (0u == src_stage) {
       src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
     }
-    VkPipelineStageFlags dst_stage = vk_stage(cur_stage);
+    VkPipelineStageFlags dst_stage = vkapi::vk_stage(cur_stage);
     if (0u == dst_stage) {
       dst_stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
     }
@@ -434,8 +437,8 @@ void vTensorStorage::transition(
 
     if (image_) {
       pipeline_barrier.images.emplace_back(
-          vk_access(prev_stage, prev_access),
-          vk_access(cur_stage, cur_access),
+          vkapi::vk_access(prev_stage, prev_access),
+          vkapi::vk_access(cur_stage, cur_access),
           cur_layout,
           new_layout,
           image_);
@@ -443,8 +446,8 @@ void vTensorStorage::transition(
       image_.set_layout(new_layout);
     } else if (buffer_) {
       pipeline_barrier.buffers.emplace_back(
-          vk_access(prev_stage, prev_access),
-          vk_access(cur_stage, cur_access),
+          vkapi::vk_access(prev_stage, prev_access),
+          vkapi::vk_access(cur_stage, cur_access),
           buffer_);
     }
   }
@@ -455,8 +458,8 @@ void vTensorStorage::transition(
 
 void vTensorStorage::discard_and_reallocate(
     const std::vector<int64_t>& padded_sizes,
-    const GPUMemoryLayout gpu_memory_layout,
-    const ScalarType dtype) {
+    const vkapi::GPUMemoryLayout gpu_memory_layout,
+    const vkapi::ScalarType dtype) {
   const bool image_owns_memory = image_.owns_memory();
   const bool buffer_owns_memory = buffer_.owns_memory();
 
diff --git a/backends/vulkan/runtime/api/Tensor.h b/backends/vulkan/runtime/api/Tensor.h
index 327216205b5..18c9cde936b 100644
--- a/backends/vulkan/runtime/api/Tensor.h
+++ b/backends/vulkan/runtime/api/Tensor.h
@@ -12,7 +12,8 @@
 
 #include <executorch/backends/vulkan/runtime/api/Context.h>
 #include <executorch/backends/vulkan/runtime/api/ParamsBuffer.h>
-#include <executorch/backends/vulkan/runtime/api/Types.h>
+
+#include <executorch/backends/vulkan/runtime/api/vk_api/Types.h>
 
 namespace vkcompute {
 namespace api {
@@ -30,7 +31,7 @@ namespace api {
  */
 std::vector<int64_t> calculate_strides(
     const std::vector<int64_t>& sizes,
-    const GPUMemoryLayout memory_layout,
+    const vkapi::GPUMemoryLayout memory_layout,
     const bool texel_strides = true);
 
 /*
@@ -45,11 +46,12 @@ std::vector<int64_t> calculate_strides(
  *   1. The dimensionality of the tensor will be padded to a multiple of 4.
  *   2. The size of the packed dimension will be padded to a multiple of 4.
  *
- * The "packed dimension" is determined based on the GPUMemoryLayout argument.
+ * The "packed dimension" is determined based on the vkapi::GPUMemoryLayout
+ * argument.
  */
 std::vector<int64_t> calculate_padded_sizes(
     const std::vector<int64_t>& sizes,
-    const GPUMemoryLayout memory_layout);
+    const vkapi::GPUMemoryLayout memory_layout);
 
 /*
  * Given the padded sizes of a tensor and the GPU memory layout, calculate the
@@ -57,16 +59,19 @@ std::vector<int64_t> calculate_padded_sizes(
  */
 utils::uvec3 calculate_image_extents(
     const std::vector<int64_t>& padded_sizes,
-    const GPUMemoryLayout memory_layout);
+    const vkapi::GPUMemoryLayout memory_layout);
 
 struct LastAccess {
-  PipelineStageFlags stage;
-  MemoryAccessFlags access;
+  vkapi::PipelineStageFlags stage;
+  vkapi::MemoryAccessFlags access;
 
   LastAccess()
-      : stage{PipelineStage::NO_STAGE}, access{MemoryAccessType::NONE} {}
+      : stage{vkapi::PipelineStage::NO_STAGE},
+        access{vkapi::MemoryAccessType::NONE} {}
 
-  LastAccess(PipelineStageFlags stage_flags, MemoryAccessFlags access_flags)
+  LastAccess(
+      vkapi::PipelineStageFlags stage_flags,
+      vkapi::MemoryAccessFlags access_flags)
       : stage{stage_flags}, access{access_flags} {}
 };
 
@@ -77,10 +82,10 @@ class vTensorStorage final {
 
   vTensorStorage(
       Context* context,
-      const StorageType storage_type,
-      const GPUMemoryLayout gpu_memory_layout,
+      const vkapi::StorageType storage_type,
+      const vkapi::GPUMemoryLayout gpu_memory_layout,
       const std::vector<int64_t>& sizes,
-      const ScalarType dtype,
+      const vkapi::ScalarType dtype,
       const bool allocate_memory = true);
 
   vTensorStorage(const vTensorStorage& other) = delete;
@@ -97,15 +102,15 @@ class vTensorStorage final {
   // Context
   Context* context_{};
 
-  StorageType storage_type_;
+  vkapi::StorageType storage_type_;
 
   // Resource sizings
   utils::uvec3 image_extents_{};
   int64_t buffer_length_{};
 
   // GPU Storage
-  mutable VulkanImage image_;
-  mutable VulkanBuffer buffer_;
+  mutable vkapi::VulkanImage image_;
+  mutable vkapi::VulkanBuffer buffer_;
 
   // Last Access - used to insert memory barriers
   LastAccess last_access_;
@@ -116,9 +121,9 @@ class vTensorStorage final {
 
   // Memory barrier insertion
   void transition(
-      PipelineBarrier&,
-      const PipelineStageFlags,
-      const MemoryAccessFlags);
+      vkapi::PipelineBarrier&,
+      const vkapi::PipelineStageFlags,
+      const vkapi::MemoryAccessFlags);
 
   // Validation
   void verify() const;
@@ -130,8 +135,8 @@ class vTensorStorage final {
 
   void discard_and_reallocate(
       const std::vector<int64_t>& padded_sizes,
-      const GPUMemoryLayout gpu_memory_layout,
-      const ScalarType dtype);
+      const vkapi::GPUMemoryLayout gpu_memory_layout,
+      const vkapi::ScalarType dtype);
 };
 
 class vTensor final {
@@ -146,9 +151,9 @@ class vTensor final {
   explicit vTensor(
       Context* context,
       const std::vector<int64_t>& sizes,
-      const ScalarType dtype,
-      const StorageType storage_type = kTexture3D,
-      const GPUMemoryLayout memory_layout = kChannelsPacked,
+      const vkapi::ScalarType dtype,
+      const vkapi::StorageType storage_type = vkapi::kTexture3D,
+      const vkapi::GPUMemoryLayout memory_layout = vkapi::kChannelsPacked,
       const bool allocate_memory = true);
 
   vTensor(const vTensor& other) = delete;
@@ -158,8 +163,8 @@ class vTensor final {
   vTensor& operator=(vTensor&& other) = default;
 
  private:
-  ScalarType dtype_;
-  GPUMemoryLayout memory_layout_;
+  vkapi::ScalarType dtype_;
+  vkapi::GPUMemoryLayout memory_layout_;
 
   // sizes of the tensor in NCHW dimension order
   std::vector<int64_t> sizes_;
@@ -191,34 +196,42 @@ class vTensor final {
    Texture Access
   */
 
-  inline VulkanImage& image() const& {
+  inline vkapi::VulkanImage& image() const& {
     return storage_.image_;
   }
 
-  VulkanImage& image(PipelineBarrier&, const PipelineStageFlags) &;
+  vkapi::VulkanImage& image(
+      vkapi::PipelineBarrier&,
+      const vkapi::PipelineStageFlags) &;
 
-  VulkanImage&
-  image(PipelineBarrier&, const PipelineStageFlags, const MemoryAccessFlags) &;
+  vkapi::VulkanImage& image(
+      vkapi::PipelineBarrier&,
+      const vkapi::PipelineStageFlags,
+      const vkapi::MemoryAccessFlags) &;
 
-  inline VulkanBuffer& buffer() const& {
+  inline vkapi::VulkanBuffer& buffer() const& {
     return storage_.buffer_;
   }
 
-  VulkanBuffer& buffer(PipelineBarrier&, const PipelineStageFlags) &;
+  vkapi::VulkanBuffer& buffer(
+      vkapi::PipelineBarrier&,
+      const vkapi::PipelineStageFlags) &;
 
-  VulkanBuffer&
-  buffer(PipelineBarrier&, const PipelineStageFlags, const MemoryAccessFlags) &;
+  vkapi::VulkanBuffer& buffer(
+      vkapi::PipelineBarrier&,
+      const vkapi::PipelineStageFlags,
+      const vkapi::MemoryAccessFlags) &;
 
   /*
     Metadata
   */
 
-  inline StorageType storage_type() const {
+  inline vkapi::StorageType storage_type() const {
     return storage_.storage_type_;
   }
 
   inline bool has_buffer_storage() const {
-    return storage_.storage_type_ == kBuffer;
+    return storage_.storage_type_ == vkapi::kBuffer;
   }
 
   inline const utils::uvec3& image_extents() const {
@@ -226,13 +239,13 @@ class vTensor final {
   }
 
   /*
-   * Extract an `ScalarType` from the TensorOptions member
+   * Extract an `vkapi::ScalarType` from the TensorOptions member
    */
-  inline ScalarType dtype() const {
+  inline vkapi::ScalarType dtype() const {
     return dtype_;
   }
 
-  inline GPUMemoryLayout gpu_memory_layout() const {
+  inline vkapi::GPUMemoryLayout gpu_memory_layout() const {
     return memory_layout_;
   }
 
@@ -257,7 +270,7 @@ class vTensor final {
    * Note that dimensions that are not present in the tensor's sizes are set to
    * a size of 1.
    */
-  const BufferBindInfo sizes_ubo();
+  const vkapi::BufferBindInfo sizes_ubo();
 
   /*
    * Returns a GPU buffer containing the virtual image extents of the tensor.
@@ -268,18 +281,18 @@ class vTensor final {
    *
    * This buffer should only be used to
    */
-  const BufferBindInfo texture_limits_ubo();
+  const vkapi::BufferBindInfo texture_limits_ubo();
 
   /*
    * Returns the strides of the texel buffer used to store the tensor, as
    * calculated by calculate_strides().
    */
-  const BufferBindInfo texel_strides_ubo();
+  const vkapi::BufferBindInfo texel_strides_ubo();
 
   /*
    * Returns the number of texels in the texel buffer used to store the tensor.
    */
-  const BufferBindInfo ntexels_ubo();
+  const vkapi::BufferBindInfo ntexels_ubo();
 
   inline const utils::ivec3 texture_limits() const {
     return texture_limits_.limits;
@@ -328,7 +341,7 @@ class vTensor final {
   /*
    * Binds the underlying resource to the given memory allocation
    */
-  void bind_allocation(const Allocation& allocation);
+  void bind_allocation(const vkapi::Allocation& allocation);
 
  private:
   /*
diff --git a/backends/vulkan/runtime/api/api.h b/backends/vulkan/runtime/api/api.h
index f5d4771976b..74b8355162c 100644
--- a/backends/vulkan/runtime/api/api.h
+++ b/backends/vulkan/runtime/api/api.h
@@ -8,22 +8,23 @@
 
 #pragma once
 
-#include <executorch/backends/vulkan/runtime/api/Adapter.h>
-#include <executorch/backends/vulkan/runtime/api/Command.h>
 #include <executorch/backends/vulkan/runtime/api/Context.h>
-#include <executorch/backends/vulkan/runtime/api/Descriptor.h>
-#include <executorch/backends/vulkan/runtime/api/Fence.h>
 #include <executorch/backends/vulkan/runtime/api/ParamsBuffer.h>
-#include <executorch/backends/vulkan/runtime/api/Pipeline.h>
-#include <executorch/backends/vulkan/runtime/api/Runtime.h>
-#include <executorch/backends/vulkan/runtime/api/Shader.h>
 #include <executorch/backends/vulkan/runtime/api/ShaderRegistry.h>
 #include <executorch/backends/vulkan/runtime/api/StorageBuffer.h>
 #include <executorch/backends/vulkan/runtime/api/Tensor.h>
 
 #include <executorch/backends/vulkan/runtime/api/utils/VecUtils.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/Allocation.h>
-#include <executorch/backends/vulkan/runtime/api/memory/Allocator.h>
-#include <executorch/backends/vulkan/runtime/api/memory/Buffer.h>
-#include <executorch/backends/vulkan/runtime/api/memory/Image.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Adapter.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Command.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Descriptor.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Fence.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Pipeline.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Runtime.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Shader.h>
+
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Allocation.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Allocator.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Buffer.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Image.h>
diff --git a/backends/vulkan/runtime/api/utils/VecUtils.h b/backends/vulkan/runtime/api/utils/VecUtils.h
index 0d5598490bf..7bbd0ba61ec 100644
--- a/backends/vulkan/runtime/api/utils/VecUtils.h
+++ b/backends/vulkan/runtime/api/utils/VecUtils.h
@@ -13,9 +13,9 @@
 #include <numeric>
 #include <type_traits>
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/Exception.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Exception.h>
 
 namespace vkcompute {
 namespace utils {
diff --git a/backends/vulkan/runtime/api/Adapter.cpp b/backends/vulkan/runtime/api/vk_api/Adapter.cpp
similarity index 99%
rename from backends/vulkan/runtime/api/Adapter.cpp
rename to backends/vulkan/runtime/api/vk_api/Adapter.cpp
index 96591064559..741a62c7a26 100644
--- a/backends/vulkan/runtime/api/Adapter.cpp
+++ b/backends/vulkan/runtime/api/vk_api/Adapter.cpp
@@ -8,7 +8,7 @@
 
 // @lint-ignore-every CLANGTIDY clang-diagnostic-missing-field-initializers
 
-#include <executorch/backends/vulkan/runtime/api/Adapter.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Adapter.h>
 
 #include <bitset>
 #include <cstring>
@@ -17,7 +17,7 @@
 #include <utility>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 PhysicalDevice::PhysicalDevice(VkPhysicalDevice physical_device_handle)
     : handle(physical_device_handle),
@@ -470,5 +470,5 @@ std::ostream& operator<<(std::ostream& os, const Adapter& adapter) {
   return os;
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Adapter.h b/backends/vulkan/runtime/api/vk_api/Adapter.h
similarity index 95%
rename from backends/vulkan/runtime/api/Adapter.h
rename to backends/vulkan/runtime/api/vk_api/Adapter.h
index 7d64cb7bcfd..3bd56543b5e 100644
--- a/backends/vulkan/runtime/api/Adapter.h
+++ b/backends/vulkan/runtime/api/vk_api/Adapter.h
@@ -10,20 +10,21 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/Pipeline.h>
-#include <executorch/backends/vulkan/runtime/api/Shader.h>
 #include <executorch/backends/vulkan/runtime/api/utils/VecUtils.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/Allocator.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Pipeline.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Shader.h>
+
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Allocator.h>
 
 #include <array>
 #include <mutex>
 #include <ostream>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 struct PhysicalDevice final {
   // Handle
@@ -238,5 +239,5 @@ class Adapter final {
   friend std::ostream& operator<<(std::ostream&, const Adapter&);
 };
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Command.cpp b/backends/vulkan/runtime/api/vk_api/Command.cpp
similarity index 98%
rename from backends/vulkan/runtime/api/Command.cpp
rename to backends/vulkan/runtime/api/vk_api/Command.cpp
index 47b640417f5..05746d33141 100644
--- a/backends/vulkan/runtime/api/Command.cpp
+++ b/backends/vulkan/runtime/api/vk_api/Command.cpp
@@ -6,13 +6,13 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/api/Adapter.h>
-#include <executorch/backends/vulkan/runtime/api/Command.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Adapter.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Command.h>
 
 #include <mutex>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 //
 // CommandBuffer
@@ -299,5 +299,5 @@ void CommandPool::allocate_new_batch(const uint32_t count) {
       device_, &allocate_info, buffers_.data() + in_use_));
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Command.h b/backends/vulkan/runtime/api/vk_api/Command.h
similarity index 88%
rename from backends/vulkan/runtime/api/Command.h
rename to backends/vulkan/runtime/api/vk_api/Command.h
index 1d60afcfa5f..7f9f399e372 100644
--- a/backends/vulkan/runtime/api/Command.h
+++ b/backends/vulkan/runtime/api/vk_api/Command.h
@@ -10,18 +10,19 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/Descriptor.h>
-#include <executorch/backends/vulkan/runtime/api/Pipeline.h>
-#include <executorch/backends/vulkan/runtime/api/Shader.h>
 #include <executorch/backends/vulkan/runtime/api/utils/VecUtils.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/Buffer.h>
-#include <executorch/backends/vulkan/runtime/api/memory/Image.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Descriptor.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Pipeline.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Shader.h>
+
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Buffer.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Image.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 class CommandBuffer final {
  public:
@@ -138,5 +139,5 @@ class CommandPool final {
   void allocate_new_batch(const uint32_t);
 };
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Descriptor.cpp b/backends/vulkan/runtime/api/vk_api/Descriptor.cpp
similarity index 98%
rename from backends/vulkan/runtime/api/Descriptor.cpp
rename to backends/vulkan/runtime/api/vk_api/Descriptor.cpp
index 82204b5f329..0e6e294c8cb 100644
--- a/backends/vulkan/runtime/api/Descriptor.cpp
+++ b/backends/vulkan/runtime/api/vk_api/Descriptor.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/api/Descriptor.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Descriptor.h>
 
 #include <executorch/backends/vulkan/runtime/api/utils/VecUtils.h>
 
@@ -14,7 +14,7 @@
 #include <utility>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 //
 // BufferBindInfo
@@ -330,5 +330,5 @@ void DescriptorPool::flush() {
   }
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Descriptor.h b/backends/vulkan/runtime/api/vk_api/Descriptor.h
similarity index 92%
rename from backends/vulkan/runtime/api/Descriptor.h
rename to backends/vulkan/runtime/api/vk_api/Descriptor.h
index 9b872780e00..13fff7d37dd 100644
--- a/backends/vulkan/runtime/api/Descriptor.h
+++ b/backends/vulkan/runtime/api/vk_api/Descriptor.h
@@ -10,17 +10,17 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/Shader.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Shader.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/Buffer.h>
-#include <executorch/backends/vulkan/runtime/api/memory/Image.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Buffer.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Image.h>
 
 #include <unordered_map>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 /*
  * Stores the binding information of a Vulkan Buffer so that the buffer can be
@@ -161,5 +161,5 @@ class DescriptorPool final {
   void flush();
 };
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Exception.cpp b/backends/vulkan/runtime/api/vk_api/Exception.cpp
similarity index 95%
rename from backends/vulkan/runtime/api/Exception.cpp
rename to backends/vulkan/runtime/api/vk_api/Exception.cpp
index 2aa1a14809a..a85a17a0ed6 100644
--- a/backends/vulkan/runtime/api/Exception.cpp
+++ b/backends/vulkan/runtime/api/vk_api/Exception.cpp
@@ -6,12 +6,12 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/api/Exception.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Exception.h>
 
 #include <sstream>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 #define VK_RESULT_CASE(code) \
   case code:                 \
@@ -77,5 +77,5 @@ Error::Error(SourceLocation source_location, const char* cond, std::string msg)
   what_ = oss.str();
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Exception.h b/backends/vulkan/runtime/api/vk_api/Exception.h
similarity index 89%
rename from backends/vulkan/runtime/api/Exception.h
rename to backends/vulkan/runtime/api/vk_api/Exception.h
index eb3f6c46994..b164194e758 100644
--- a/backends/vulkan/runtime/api/Exception.h
+++ b/backends/vulkan/runtime/api/vk_api/Exception.h
@@ -9,7 +9,7 @@
 #pragma once
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
 #include <executorch/backends/vulkan/runtime/api/utils/StringUtils.h>
 
@@ -22,7 +22,7 @@
   do {                                                                      \
     const VkResult result = (function);                                     \
     if (VK_SUCCESS != result) {                                             \
-      throw ::vkcompute::api::Error(                                        \
+      throw ::vkcompute::vkapi::Error(                                      \
           {__func__, __FILE__, static_cast<uint32_t>(__LINE__)},            \
           ::vkcompute::utils::concat_str(#function, " returned ", result)); \
     }                                                                       \
@@ -31,7 +31,7 @@
 #define VK_CHECK_COND(cond, ...)                                 \
   do {                                                           \
     if (!(cond)) {                                               \
-      throw ::vkcompute::api::Error(                             \
+      throw ::vkcompute::vkapi::Error(                           \
           {__func__, __FILE__, static_cast<uint32_t>(__LINE__)}, \
           #cond,                                                 \
           ::vkcompute::utils::concat_str(__VA_ARGS__));          \
@@ -40,13 +40,13 @@
 
 #define VK_THROW(...)                                          \
   do {                                                         \
-    throw ::vkcompute::api::Error(                             \
+    throw ::vkcompute::vkapi::Error(                           \
         {__func__, __FILE__, static_cast<uint32_t>(__LINE__)}, \
         ::vkcompute::utils::concat_str(__VA_ARGS__));          \
   } while (false)
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 std::ostream& operator<<(std::ostream& out, const VkResult loc);
 
@@ -78,5 +78,5 @@ class Error : public std::exception {
   }
 };
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Fence.cpp b/backends/vulkan/runtime/api/vk_api/Fence.cpp
similarity index 95%
rename from backends/vulkan/runtime/api/Fence.cpp
rename to backends/vulkan/runtime/api/vk_api/Fence.cpp
index 6253a5e13e1..2e2ab0f53ac 100644
--- a/backends/vulkan/runtime/api/Fence.cpp
+++ b/backends/vulkan/runtime/api/vk_api/Fence.cpp
@@ -6,10 +6,10 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/api/Fence.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Fence.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 VulkanFence::VulkanFence()
     : device_(VK_NULL_HANDLE), handle_(VK_NULL_HANDLE), waiting_(false) {}
@@ -72,5 +72,5 @@ void VulkanFence::wait() {
   }
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Fence.h b/backends/vulkan/runtime/api/vk_api/Fence.h
similarity index 92%
rename from backends/vulkan/runtime/api/Fence.h
rename to backends/vulkan/runtime/api/vk_api/Fence.h
index 613a24aaec5..a505541b358 100644
--- a/backends/vulkan/runtime/api/Fence.h
+++ b/backends/vulkan/runtime/api/vk_api/Fence.h
@@ -10,14 +10,14 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/Exception.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Exception.h>
 
 #include <stack>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 class VulkanFence final {
  public:
@@ -94,5 +94,5 @@ struct FencePool final {
   }
 };
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Pipeline.cpp b/backends/vulkan/runtime/api/vk_api/Pipeline.cpp
similarity index 99%
rename from backends/vulkan/runtime/api/Pipeline.cpp
rename to backends/vulkan/runtime/api/vk_api/Pipeline.cpp
index 06b22a98283..17a2f6988de 100644
--- a/backends/vulkan/runtime/api/Pipeline.cpp
+++ b/backends/vulkan/runtime/api/vk_api/Pipeline.cpp
@@ -6,12 +6,12 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/api/Pipeline.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Pipeline.h>
 
 #include <fstream>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 //
 // Utility Functions
@@ -464,5 +464,5 @@ void ComputePipelineCache::save_cache() {
   file.write(buffer.data(), buffer.size());
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Pipeline.h b/backends/vulkan/runtime/api/vk_api/Pipeline.h
similarity index 95%
rename from backends/vulkan/runtime/api/Pipeline.h
rename to backends/vulkan/runtime/api/vk_api/Pipeline.h
index 02ca5542b2d..6bf30571c8d 100644
--- a/backends/vulkan/runtime/api/Pipeline.h
+++ b/backends/vulkan/runtime/api/vk_api/Pipeline.h
@@ -10,20 +10,20 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/Shader.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Shader.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/Buffer.h>
-#include <executorch/backends/vulkan/runtime/api/memory/Image.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Buffer.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Image.h>
 
 #include <mutex>
 #include <unordered_map>
 
-#define SV(x) ::vkcompute::api::SpecVar(x)
+#define SV(x) ::vkcompute::vkapi::SpecVar(x)
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 struct SpecVar final {
   enum class Type : uint8_t {
@@ -291,5 +291,5 @@ class ComputePipelineCache final {
 // Impl
 //
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/QueryPool.cpp b/backends/vulkan/runtime/api/vk_api/QueryPool.cpp
similarity index 98%
rename from backends/vulkan/runtime/api/QueryPool.cpp
rename to backends/vulkan/runtime/api/vk_api/QueryPool.cpp
index b351f189dc1..ba2647d48df 100644
--- a/backends/vulkan/runtime/api/QueryPool.cpp
+++ b/backends/vulkan/runtime/api/vk_api/QueryPool.cpp
@@ -8,16 +8,17 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadImplicitCast
 
-#include <executorch/backends/vulkan/runtime/api/QueryPool.h>
 #include <executorch/backends/vulkan/runtime/api/utils/VecUtils.h>
 
+#include <executorch/backends/vulkan/runtime/api/vk_api/QueryPool.h>
+
 #include <cmath>
 #include <iomanip>
 #include <iostream>
 #include <utility>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 namespace {
 
@@ -247,5 +248,5 @@ unsigned long QueryPool::get_total_shader_ns(std::string kernel_name) {
   }
   return 0;
 }
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/QueryPool.h b/backends/vulkan/runtime/api/vk_api/QueryPool.h
similarity index 87%
rename from backends/vulkan/runtime/api/QueryPool.h
rename to backends/vulkan/runtime/api/vk_api/QueryPool.h
index 5b0b3a11862..9ad680cadf2 100644
--- a/backends/vulkan/runtime/api/QueryPool.h
+++ b/backends/vulkan/runtime/api/vk_api/QueryPool.h
@@ -13,18 +13,18 @@
 #include <cstdint>
 #include <functional>
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/Adapter.h>
-#include <executorch/backends/vulkan/runtime/api/Command.h>
-#include <executorch/backends/vulkan/runtime/api/Pipeline.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Adapter.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Command.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Pipeline.h>
 
 #ifndef VULKAN_QUERY_POOL_SIZE
 #define VULKAN_QUERY_POOL_SIZE 4096u
 #endif
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 struct QueryPoolConfig final {
   uint32_t max_query_count = VULKAN_QUERY_POOL_SIZE;
@@ -108,5 +108,5 @@ class QueryPool final {
   }
 };
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Runtime.cpp b/backends/vulkan/runtime/api/vk_api/Runtime.cpp
similarity index 98%
rename from backends/vulkan/runtime/api/Runtime.cpp
rename to backends/vulkan/runtime/api/vk_api/Runtime.cpp
index 432af326a53..5ac788c1433 100644
--- a/backends/vulkan/runtime/api/Runtime.cpp
+++ b/backends/vulkan/runtime/api/vk_api/Runtime.cpp
@@ -10,11 +10,11 @@
 #include <iostream>
 #include <sstream>
 
-#include <executorch/backends/vulkan/runtime/api/Adapter.h>
-#include <executorch/backends/vulkan/runtime/api/Runtime.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Adapter.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Runtime.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 #define PRINT_CASE(name)       \
   case MemoryAccessType::name: \
@@ -378,5 +378,5 @@ Runtime* runtime() {
   return p_runtime.get();
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Runtime.h b/backends/vulkan/runtime/api/vk_api/Runtime.h
similarity index 94%
rename from backends/vulkan/runtime/api/Runtime.h
rename to backends/vulkan/runtime/api/vk_api/Runtime.h
index e4cb6922ad8..15e8aca8100 100644
--- a/backends/vulkan/runtime/api/Runtime.h
+++ b/backends/vulkan/runtime/api/vk_api/Runtime.h
@@ -13,12 +13,12 @@
 #include <functional>
 #include <memory>
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/Adapter.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Adapter.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 //
 // A Vulkan Runtime initializes a Vulkan instance and decouples the concept of
@@ -104,5 +104,5 @@ class Runtime final {
 // a static local variable.
 Runtime* runtime();
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Shader.cpp b/backends/vulkan/runtime/api/vk_api/Shader.cpp
similarity index 97%
rename from backends/vulkan/runtime/api/Shader.cpp
rename to backends/vulkan/runtime/api/vk_api/Shader.cpp
index 731a79b1f7f..87bf5eca520 100644
--- a/backends/vulkan/runtime/api/Shader.cpp
+++ b/backends/vulkan/runtime/api/vk_api/Shader.cpp
@@ -8,10 +8,10 @@
 
 #include <utility>
 
-#include <executorch/backends/vulkan/runtime/api/Shader.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Shader.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 //
 // ShaderInfo
@@ -210,5 +210,5 @@ void ShaderCache::purge() {
   cache_.clear();
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/Shader.h b/backends/vulkan/runtime/api/vk_api/Shader.h
similarity index 96%
rename from backends/vulkan/runtime/api/Shader.h
rename to backends/vulkan/runtime/api/vk_api/Shader.h
index eb727d6428b..f6c70735e4c 100644
--- a/backends/vulkan/runtime/api/Shader.h
+++ b/backends/vulkan/runtime/api/vk_api/Shader.h
@@ -10,17 +10,17 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
-
-#include <executorch/backends/vulkan/runtime/api/Types.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
 #include <executorch/backends/vulkan/runtime/api/utils/VecUtils.h>
 
+#include <executorch/backends/vulkan/runtime/api/vk_api/Types.h>
+
 #include <mutex>
 #include <unordered_map>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 class ShaderLayout final {
  public:
@@ -183,7 +183,7 @@ class ShaderCache final {
   void purge();
 };
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
 
 inline bool operator==(
diff --git a/backends/vulkan/runtime/api/Types.h b/backends/vulkan/runtime/api/vk_api/Types.h
similarity index 95%
rename from backends/vulkan/runtime/api/Types.h
rename to backends/vulkan/runtime/api/vk_api/Types.h
index f58edff5b9e..ae52ddfd25c 100644
--- a/backends/vulkan/runtime/api/Types.h
+++ b/backends/vulkan/runtime/api/vk_api/Types.h
@@ -13,9 +13,9 @@
 #include <cstddef>
 #include <cstdint>
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/Exception.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Exception.h>
 
 #ifdef USE_VULKAN_FP16_INFERENCE
 #define VK_FORMAT_FLOAT4 VK_FORMAT_R16G16B16A16_SFLOAT
@@ -35,7 +35,7 @@
   _(int32_t, VK_FORMAT_R32G32B32A32_SINT, QInt32)
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 //
 // Scalar Types
@@ -138,10 +138,10 @@ inline std::ostream& operator<<(std::ostream& os, const ScalarType dtype) {
 template <ScalarType N>
 struct ScalarTypeToCType;
 
-#define SPECIALIZE_ScalarTypeToCType(ctype, vkformat, scalar_type)      \
-  template <>                                                           \
-  struct ScalarTypeToCType<::vkcompute::api::ScalarType::scalar_type> { \
-    using type = ctype;                                                 \
+#define SPECIALIZE_ScalarTypeToCType(ctype, vkformat, scalar_type)        \
+  template <>                                                             \
+  struct ScalarTypeToCType<::vkcompute::vkapi::ScalarType::scalar_type> { \
+    using type = ctype;                                                   \
   };
 
 VK_FORALL_SCALAR_TYPES(SPECIALIZE_ScalarTypeToCType)
@@ -211,5 +211,5 @@ T to_packed_dim_nchw_offset(const GPUMemoryLayout layout) {
   return static_cast<T>(layout) + 1;
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/VkUtils.h b/backends/vulkan/runtime/api/vk_api/VkUtils.h
similarity index 80%
rename from backends/vulkan/runtime/api/VkUtils.h
rename to backends/vulkan/runtime/api/vk_api/VkUtils.h
index b0e6a820423..7850a7359e2 100644
--- a/backends/vulkan/runtime/api/VkUtils.h
+++ b/backends/vulkan/runtime/api/vk_api/VkUtils.h
@@ -8,14 +8,14 @@
 
 #pragma once
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 inline VkExtent3D create_extent3d(const utils::uvec3& extents) {
   return VkExtent3D{extents.data[0u], extents.data[1u], extents.data[2u]};
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/memory/Allocation.cpp b/backends/vulkan/runtime/api/vk_api/memory/Allocation.cpp
similarity index 94%
rename from backends/vulkan/runtime/api/memory/Allocation.cpp
rename to backends/vulkan/runtime/api/vk_api/memory/Allocation.cpp
index 9bde2ac744d..2a0ae035ac0 100644
--- a/backends/vulkan/runtime/api/memory/Allocation.cpp
+++ b/backends/vulkan/runtime/api/vk_api/memory/Allocation.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/api/memory/Allocation.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Allocation.h>
 
 #define PRINT_FIELD(struct, field) #field << ": " << struct.field << std::endl
 
@@ -23,7 +23,7 @@ std::ostream& operator<<(std::ostream& out, VmaTotalStatistics stats) {
 #undef PRINT_FIELD
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 Allocation::Allocation()
     : memory_requirements{},
@@ -70,5 +70,5 @@ Allocation::~Allocation() {
   }
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/memory/Allocation.h b/backends/vulkan/runtime/api/vk_api/memory/Allocation.h
similarity index 82%
rename from backends/vulkan/runtime/api/memory/Allocation.h
rename to backends/vulkan/runtime/api/vk_api/memory/Allocation.h
index b93556bd501..059b73b24ed 100644
--- a/backends/vulkan/runtime/api/memory/Allocation.h
+++ b/backends/vulkan/runtime/api/vk_api/memory/Allocation.h
@@ -10,18 +10,18 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/Exception.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/Exception.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/vma_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/vma_api.h>
 
 #include <ostream>
 
 std::ostream& operator<<(std::ostream& out, VmaTotalStatistics stats);
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 struct Allocation final {
   explicit Allocation();
@@ -52,5 +52,5 @@ struct Allocation final {
   }
 };
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/memory/Allocator.cpp b/backends/vulkan/runtime/api/vk_api/memory/Allocator.cpp
similarity index 98%
rename from backends/vulkan/runtime/api/memory/Allocator.cpp
rename to backends/vulkan/runtime/api/vk_api/memory/Allocator.cpp
index 5749ecd0714..e78872d760c 100644
--- a/backends/vulkan/runtime/api/memory/Allocator.cpp
+++ b/backends/vulkan/runtime/api/vk_api/memory/Allocator.cpp
@@ -6,10 +6,10 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/api/memory/Allocator.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Allocator.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 Allocator::Allocator(
     VkInstance instance,
@@ -186,5 +186,5 @@ VulkanBuffer Allocator::create_uniform_buffer(const VkDeviceSize size) {
   return uniform_buffer;
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/memory/Allocator.h b/backends/vulkan/runtime/api/vk_api/memory/Allocator.h
similarity index 86%
rename from backends/vulkan/runtime/api/memory/Allocator.h
rename to backends/vulkan/runtime/api/vk_api/memory/Allocator.h
index 5567047d7ee..1db7f65c73d 100644
--- a/backends/vulkan/runtime/api/memory/Allocator.h
+++ b/backends/vulkan/runtime/api/vk_api/memory/Allocator.h
@@ -10,18 +10,18 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/vma_api.h>
+#include <executorch/backends/vulkan/runtime/api/utils/VecUtils.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/Allocation.h>
-#include <executorch/backends/vulkan/runtime/api/memory/Buffer.h>
-#include <executorch/backends/vulkan/runtime/api/memory/Image.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/vma_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/utils/VecUtils.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Allocation.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Buffer.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Image.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 constexpr VmaAllocationCreateFlags DEFAULT_ALLOCATION_STRATEGY =
     VMA_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT;
@@ -106,5 +106,5 @@ inline VulkanBuffer Allocator::create_params_buffer(const Block& block) {
   return uniform_buffer;
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/memory/Buffer.cpp b/backends/vulkan/runtime/api/vk_api/memory/Buffer.cpp
similarity index 98%
rename from backends/vulkan/runtime/api/memory/Buffer.cpp
rename to backends/vulkan/runtime/api/vk_api/memory/Buffer.cpp
index 0d4d075cdb7..5b11413bf2d 100644
--- a/backends/vulkan/runtime/api/memory/Buffer.cpp
+++ b/backends/vulkan/runtime/api/vk_api/memory/Buffer.cpp
@@ -6,10 +6,10 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/api/memory/Buffer.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Buffer.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 //
 // VulkanBuffer
@@ -192,5 +192,5 @@ BufferMemoryBarrier::BufferMemoryBarrier(
           buffer.buffer_properties_.mem_range, // size
       } {}
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/memory/Buffer.h b/backends/vulkan/runtime/api/vk_api/memory/Buffer.h
similarity index 93%
rename from backends/vulkan/runtime/api/memory/Buffer.h
rename to backends/vulkan/runtime/api/vk_api/memory/Buffer.h
index e80b2395c22..41e93ba7546 100644
--- a/backends/vulkan/runtime/api/memory/Buffer.h
+++ b/backends/vulkan/runtime/api/vk_api/memory/Buffer.h
@@ -10,16 +10,16 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
 #include <executorch/backends/vulkan/runtime/api/utils/VecUtils.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/vma_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/vma_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/Allocation.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Allocation.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 using MemoryAccessFlags = uint8_t;
 
@@ -170,5 +170,5 @@ struct BufferMemoryBarrier final {
       const VulkanBuffer& buffer);
 };
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/memory/Image.cpp b/backends/vulkan/runtime/api/vk_api/memory/Image.cpp
similarity index 98%
rename from backends/vulkan/runtime/api/memory/Image.cpp
rename to backends/vulkan/runtime/api/vk_api/memory/Image.cpp
index 60276eda560..081ef5944f5 100644
--- a/backends/vulkan/runtime/api/memory/Image.cpp
+++ b/backends/vulkan/runtime/api/vk_api/memory/Image.cpp
@@ -6,10 +6,10 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/api/memory/Image.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Image.h>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 //
 // ImageSampler
@@ -335,5 +335,5 @@ void SamplerCache::purge() {
   cache_.clear();
 }
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/memory/Image.h b/backends/vulkan/runtime/api/vk_api/memory/Image.h
similarity index 95%
rename from backends/vulkan/runtime/api/memory/Image.h
rename to backends/vulkan/runtime/api/vk_api/memory/Image.h
index 8a7cf6081ac..59634bb5573 100644
--- a/backends/vulkan/runtime/api/memory/Image.h
+++ b/backends/vulkan/runtime/api/vk_api/memory/Image.h
@@ -10,19 +10,19 @@
 
 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
 
-#include <executorch/backends/vulkan/runtime/api/vk_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/vk_api.h>
 
 #include <executorch/backends/vulkan/runtime/api/utils/VecUtils.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/vma_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/vma_api.h>
 
-#include <executorch/backends/vulkan/runtime/api/memory/Allocation.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/Allocation.h>
 
 #include <mutex>
 #include <unordered_map>
 
 namespace vkcompute {
-namespace api {
+namespace vkapi {
 
 class ImageSampler final {
  public:
@@ -249,5 +249,5 @@ class SamplerCache final {
   void purge();
 };
 
-} // namespace api
+} // namespace vkapi
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/api/memory/vma_api.cpp b/backends/vulkan/runtime/api/vk_api/memory/vma_api.cpp
similarity index 77%
rename from backends/vulkan/runtime/api/memory/vma_api.cpp
rename to backends/vulkan/runtime/api/vk_api/memory/vma_api.cpp
index d1180305fea..2312e76df02 100644
--- a/backends/vulkan/runtime/api/memory/vma_api.cpp
+++ b/backends/vulkan/runtime/api/vk_api/memory/vma_api.cpp
@@ -7,4 +7,4 @@
  */
 
 #define VMA_IMPLEMENTATION
-#include <executorch/backends/vulkan/runtime/api/memory/vma_api.h>
+#include <executorch/backends/vulkan/runtime/api/vk_api/memory/vma_api.h>
diff --git a/backends/vulkan/runtime/api/memory/vma_api.h b/backends/vulkan/runtime/api/vk_api/memory/vma_api.h
similarity index 100%
rename from backends/vulkan/runtime/api/memory/vma_api.h
rename to backends/vulkan/runtime/api/vk_api/memory/vma_api.h
diff --git a/backends/vulkan/runtime/api/vk_api.h b/backends/vulkan/runtime/api/vk_api/vk_api.h
similarity index 100%
rename from backends/vulkan/runtime/api/vk_api.h
rename to backends/vulkan/runtime/api/vk_api/vk_api.h
diff --git a/backends/vulkan/runtime/gen_vulkan_spv.py b/backends/vulkan/runtime/gen_vulkan_spv.py
index 4d8605d7d33..5661aed4c8b 100644
--- a/backends/vulkan/runtime/gen_vulkan_spv.py
+++ b/backends/vulkan/runtime/gen_vulkan_spv.py
@@ -842,7 +842,7 @@ def generateShaderInfoStr(shader_info: ShaderInfo, name: str, sizeBytes: int) ->
     ]
 
     shader_info_str = textwrap.indent(
-        "api::shader_registry().register_shader(\n  api::ShaderInfo(\n{args}));\n".format(
+        "api::shader_registry().register_shader(\n  vkapi::ShaderInfo(\n{args}));\n".format(
             args=textwrap.indent(",\n".join(shader_info_args), "     "),
         ),
         "    ",
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
index 3a7b1183a93..c5ba9320368 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.cpp
+++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -55,7 +55,7 @@ ComputeGraph::ComputeGraph(GraphConfig config)
       prepack_descriptor_counts_{},
       execute_descriptor_counts_{},
       context_{new api::Context(
-          api::runtime()->default_adapter_i(),
+          vkapi::runtime()->default_adapter_i(),
           config_.context_config)},
       shared_objects_{},
       values_{},
@@ -89,27 +89,27 @@ ComputeGraph::~ComputeGraph() {
   context_->flush();
 }
 
-api::StorageType ComputeGraph::suggested_storage_type() {
+vkapi::StorageType ComputeGraph::suggested_storage_type() {
   if (config_.enable_storage_type_override) {
     return config_.storage_type_override;
   }
-  return api::kTexture3D;
+  return vkapi::kTexture3D;
 }
 
-api::GPUMemoryLayout ComputeGraph::suggested_memory_layout(
+vkapi::GPUMemoryLayout ComputeGraph::suggested_memory_layout(
     const std::vector<int64_t>& sizes) {
   if (config_.enable_memory_layout_override) {
     return config_.memory_layout_override;
   }
   if (sizes.size() < 3) {
-    return api::kWidthPacked;
+    return vkapi::kWidthPacked;
   }
   // For 3 dimensional tensors that only have a channels dimension of 1, still
   // prefer width packed.
   if (utils::val_at(-3, sizes) == 1) {
-    return api::kWidthPacked;
+    return vkapi::kWidthPacked;
   }
-  return api::kChannelsPacked;
+  return vkapi::kChannelsPacked;
 }
 
 void ComputeGraph::check_no_active_value_ptrs() {
@@ -131,7 +131,7 @@ std::vector<int64_t> ComputeGraph::sizes_of(const ValueRef idx) const {
   VK_THROW("Could not get sizes of value with type ", val.type());
 }
 
-api::ScalarType ComputeGraph::dtype_of(const ValueRef idx) const {
+vkapi::ScalarType ComputeGraph::dtype_of(const ValueRef idx) const {
   const Value& val = values_.at(idx);
   if (val.isTensor()) {
     return val.toConstTensor().dtype();
@@ -143,9 +143,9 @@ api::ScalarType ComputeGraph::dtype_of(const ValueRef idx) const {
 
 ValueRef ComputeGraph::add_tensor(
     const std::vector<int64_t>& sizes,
-    const api::ScalarType dtype,
-    const api::StorageType storage_type,
-    const api::GPUMemoryLayout memory_layout,
+    const vkapi::ScalarType dtype,
+    const vkapi::StorageType storage_type,
+    const vkapi::GPUMemoryLayout memory_layout,
     const int64_t shared_object_idx) {
   bool allocate_memory = shared_object_idx < 0;
 
@@ -162,8 +162,8 @@ ValueRef ComputeGraph::add_tensor(
 
 ValueRef ComputeGraph::add_tensor(
     const std::vector<int64_t>& sizes,
-    const api::ScalarType dtype,
-    const api::StorageType storage_type,
+    const vkapi::ScalarType dtype,
+    const vkapi::StorageType storage_type,
     const int64_t shared_object_idx) {
   return add_tensor(
       sizes,
@@ -175,8 +175,8 @@ ValueRef ComputeGraph::add_tensor(
 
 ValueRef ComputeGraph::add_tensor(
     const std::vector<int64_t>& sizes,
-    const api::ScalarType dtype,
-    const api::GPUMemoryLayout memory_layout,
+    const vkapi::ScalarType dtype,
+    const vkapi::GPUMemoryLayout memory_layout,
     const int64_t shared_object_idx) {
   return add_tensor(
       sizes, dtype, suggested_storage_type(), memory_layout, shared_object_idx);
@@ -184,20 +184,20 @@ ValueRef ComputeGraph::add_tensor(
 
 ValueRef ComputeGraph::add_tensor_like(
     const ValueRef idx,
-    const api::StorageType storage_type,
-    const api::GPUMemoryLayout memory_layout) {
+    const vkapi::StorageType storage_type,
+    const vkapi::GPUMemoryLayout memory_layout) {
   return add_tensor(sizes_of(idx), dtype_of(idx), storage_type, memory_layout);
 }
 
 ValueRef ComputeGraph::add_tensor_like(
     const ValueRef idx,
-    const api::GPUMemoryLayout memory_layout) {
+    const vkapi::GPUMemoryLayout memory_layout) {
   return add_tensor(sizes_of(idx), dtype_of(idx), memory_layout);
 }
 
 ValueRef ComputeGraph::add_tensor(
     const std::vector<int64_t>& sizes,
-    const api::ScalarType dtype,
+    const vkapi::ScalarType dtype,
     const int64_t shared_object_idx) {
   return add_tensor(
       sizes, dtype, suggested_memory_layout(sizes), shared_object_idx);
@@ -205,7 +205,7 @@ ValueRef ComputeGraph::add_tensor(
 
 ValueRef ComputeGraph::add_tensorref(
     const std::vector<int64_t>& sizes,
-    const api::ScalarType dtype,
+    const vkapi::ScalarType dtype,
     const void* const data) {
   ValueRef idx(static_cast<int>(values_.size()));
   check_no_active_value_ptrs();
@@ -214,7 +214,7 @@ ValueRef ComputeGraph::add_tensorref(
 }
 
 ValueRef ComputeGraph::add_staging(
-    const api::ScalarType dtype,
+    const vkapi::ScalarType dtype,
     const size_t numel) {
   ValueRef idx(static_cast<int>(values_.size()));
   check_no_active_value_ptrs();
@@ -247,7 +247,7 @@ ValueRef ComputeGraph::set_input_tensor(
     const ValueRef idx,
     const bool use_staging) {
   if (use_staging) {
-    api::ScalarType dtype = get_tensor(idx)->dtype();
+    vkapi::ScalarType dtype = get_tensor(idx)->dtype();
     size_t gpu_numel = get_tensor(idx)->gpu_numel();
     ValueRef staging_idx = add_staging(dtype, gpu_numel);
     add_staging_to_tensor_node(*this, staging_idx, idx);
@@ -262,7 +262,7 @@ ValueRef ComputeGraph::set_output_tensor(
     const ValueRef idx,
     const bool use_staging) {
   if (use_staging) {
-    api::ScalarType dtype = get_tensor(idx)->dtype();
+    vkapi::ScalarType dtype = get_tensor(idx)->dtype();
     size_t gpu_numel = get_tensor(idx)->gpu_numel();
     ValueRef staging_idx = add_staging(dtype, gpu_numel);
     // We only run this when the tensor is non-empty.  When the underlying
@@ -286,9 +286,9 @@ SharedObject& ComputeGraph::get_shared_object(const int64_t idx) {
 }
 
 void ComputeGraph::update_descriptor_counts(
-    const api::ShaderInfo& shader_info,
+    const vkapi::ShaderInfo& shader_info,
     bool execute) {
-  api::DescriptorPoolConfig* config =
+  vkapi::DescriptorPoolConfig* config =
       execute ? &execute_descriptor_counts_ : &prepack_descriptor_counts_;
 
   config->descriptor_pool_max_sets += 1;
@@ -354,7 +354,7 @@ void ComputeGraph::copy_into_staging(
     const void* data,
     const size_t numel) {
   StagingPtr staging = get_staging(idx);
-  size_t nbytes = numel * api::element_size(staging->dtype());
+  size_t nbytes = numel * vkapi::element_size(staging->dtype());
   copy_ptr_to_staging(data, *staging, nbytes);
 }
 
@@ -363,7 +363,7 @@ void ComputeGraph::copy_from_staging(
     void* data,
     const size_t numel) {
   StagingPtr staging = get_staging(idx);
-  size_t nbytes = numel * api::element_size(staging->dtype());
+  size_t nbytes = numel * vkapi::element_size(staging->dtype());
   copy_staging_to_ptr(*staging, data, nbytes);
 }
 
@@ -376,7 +376,7 @@ void ComputeGraph::prepare() {
       config_.descriptor_pool_safety_factor))
 
   uint32_t max_sets = MERGE_FIELD(descriptor_pool_max_sets);
-  api::DescriptorPoolConfig config{
+  vkapi::DescriptorPoolConfig config{
       max_sets,
       std::max(MERGE_FIELD(descriptor_uniform_buffer_count), max_sets),
       std::max(MERGE_FIELD(descriptor_storage_buffer_count), max_sets),
@@ -403,7 +403,7 @@ void ComputeGraph::encode_prepack() {
 
 void ComputeGraph::prepack() const {
   // Submit and execute the command buffer
-  api::VulkanFence fence = context_->fences().get_fence();
+  vkapi::VulkanFence fence = context_->fences().get_fence();
   context_->submit_cmd_to_gpu(fence.get_submit_handle(), /*final_use = */ true);
   fence.wait();
 
@@ -427,7 +427,7 @@ void ComputeGraph::encode_execute() {
 }
 
 void ComputeGraph::execute() const {
-  api::VulkanFence fence = context_->fences().get_fence();
+  vkapi::VulkanFence fence = context_->fences().get_fence();
   context_->submit_cmd_to_gpu(fence.get_submit_handle());
   fence.wait();
 }
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
index 3c8e6b88333..bdee9eb8cb0 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.h
+++ b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -89,8 +89,8 @@ class ComputeGraph final {
 
  private:
   GraphConfig config_;
-  api::DescriptorPoolConfig prepack_descriptor_counts_;
-  api::DescriptorPoolConfig execute_descriptor_counts_;
+  vkapi::DescriptorPoolConfig prepack_descriptor_counts_;
+  vkapi::DescriptorPoolConfig execute_descriptor_counts_;
 
   std::unique_ptr<api::Context> context_;
   std::vector<SharedObject> shared_objects_;
@@ -184,7 +184,7 @@ class ComputeGraph final {
 
   std::vector<int64_t> sizes_of(const ValueRef idx) const;
 
-  api::ScalarType dtype_of(const ValueRef idx) const;
+  vkapi::ScalarType dtype_of(const ValueRef idx) const;
 
   inline utils::uvec3 image_extents_of(const ValueRef idx) const {
     return values_.at(idx).toConstTensor().image_extents();
@@ -194,7 +194,7 @@ class ComputeGraph final {
     return values_.at(idx).toConstTensor().texel_numel();
   }
 
-  inline api::StorageType storage_type_of(const ValueRef idx) const {
+  inline vkapi::StorageType storage_type_of(const ValueRef idx) const {
     return values_.at(idx).toConstTensor().storage_type();
   }
 
@@ -202,7 +202,7 @@ class ComputeGraph final {
     return values_.at(idx).toConstTensor().has_buffer_storage();
   }
 
-  inline api::GPUMemoryLayout memory_layout_of(const ValueRef idx) const {
+  inline vkapi::GPUMemoryLayout memory_layout_of(const ValueRef idx) const {
     return values_.at(idx).toConstTensor().gpu_memory_layout();
   }
 
@@ -210,19 +210,19 @@ class ComputeGraph final {
     return values_.at(idx).toConstTensor().packed_dim_whcn_idx();
   }
 
-  inline api::BufferBindInfo sizes_ubo(const ValueRef idx) {
+  inline vkapi::BufferBindInfo sizes_ubo(const ValueRef idx) {
     return values_.at(idx).toTensor().sizes_ubo();
   }
 
-  inline api::BufferBindInfo texture_limits_ubo(const ValueRef idx) {
+  inline vkapi::BufferBindInfo texture_limits_ubo(const ValueRef idx) {
     return values_.at(idx).toTensor().texture_limits_ubo();
   }
 
-  inline api::BufferBindInfo texel_strides_ubo(const ValueRef idx) {
+  inline vkapi::BufferBindInfo texel_strides_ubo(const ValueRef idx) {
     return values_.at(idx).toTensor().texel_strides_ubo();
   }
 
-  inline api::BufferBindInfo ntexels_ubo(const ValueRef idx) {
+  inline vkapi::BufferBindInfo ntexels_ubo(const ValueRef idx) {
     return values_.at(idx).toTensor().ntexels_ubo();
   }
 
@@ -268,7 +268,7 @@ class ComputeGraph final {
    * buffer storage, and others with texture storage. Current only texture
    * storage is supported.
    */
-  api::StorageType suggested_storage_type();
+  vkapi::StorageType suggested_storage_type();
 
   /*
    * Returns a suggested memory layout (i.e. channels, width, or height packed)
@@ -278,7 +278,7 @@ class ComputeGraph final {
    * The suggested memory layout is determined based on the sizes of the tensor,
    * unless a memory layout override is defined in the graph configuration.
    */
-  api::GPUMemoryLayout suggested_memory_layout(
+  vkapi::GPUMemoryLayout suggested_memory_layout(
       const std::vector<int64_t>& sizes);
 
   //
@@ -296,9 +296,9 @@ class ComputeGraph final {
    */
   ValueRef add_tensor(
       const std::vector<int64_t>& sizes,
-      const api::ScalarType dtype,
-      const api::StorageType storage_type,
-      const api::GPUMemoryLayout memory_layout,
+      const vkapi::ScalarType dtype,
+      const vkapi::StorageType storage_type,
+      const vkapi::GPUMemoryLayout memory_layout,
       const int64_t shared_object_idx = -1);
 
   /*
@@ -307,8 +307,8 @@ class ComputeGraph final {
    */
   ValueRef add_tensor(
       const std::vector<int64_t>& sizes,
-      const api::ScalarType dtype,
-      const api::StorageType storage_type,
+      const vkapi::ScalarType dtype,
+      const vkapi::StorageType storage_type,
       const int64_t shared_object_idx = -1);
 
   /*
@@ -317,8 +317,8 @@ class ComputeGraph final {
    */
   ValueRef add_tensor(
       const std::vector<int64_t>& sizes,
-      const api::ScalarType dtype,
-      const api::GPUMemoryLayout memory_layout,
+      const vkapi::ScalarType dtype,
+      const vkapi::GPUMemoryLayout memory_layout,
       const int64_t shared_object_idx = -1);
 
   /*
@@ -328,7 +328,7 @@ class ComputeGraph final {
    */
   ValueRef add_tensor(
       const std::vector<int64_t>& sizes,
-      const api::ScalarType dtype,
+      const vkapi::ScalarType dtype,
       const int64_t shared_object_idx = -1);
 
   /*
@@ -336,8 +336,8 @@ class ComputeGraph final {
    */
   ValueRef add_tensor_like(
       const ValueRef vref,
-      const api::StorageType storage_type,
-      const api::GPUMemoryLayout memory_layout);
+      const vkapi::StorageType storage_type,
+      const vkapi::GPUMemoryLayout memory_layout);
 
   /*
    * Add a `api::vTensor` value to the graph with the properties of `vref`. The
@@ -345,7 +345,7 @@ class ComputeGraph final {
    */
   ValueRef add_tensor_like(
       const ValueRef vref,
-      const api::GPUMemoryLayout memory_layout);
+      const vkapi::GPUMemoryLayout memory_layout);
 
   /*
    * Add a `TensorRef` value to the graph with the specific properties. A
@@ -354,7 +354,7 @@ class ComputeGraph final {
    */
   ValueRef add_tensorref(
       const std::vector<int64_t>& sizes,
-      const api::ScalarType dtype,
+      const vkapi::ScalarType dtype,
       const void* const data);
 
   /*
@@ -362,7 +362,7 @@ class ComputeGraph final {
    * use memory that is visible to both the CPU and GPU, and therefore is used
    * as a intermediary when transferring data between the CPU and GPU.
    */
-  ValueRef add_staging(const api::ScalarType dtype, const size_t numel);
+  ValueRef add_staging(const vkapi::ScalarType dtype, const size_t numel);
 
   ValueRef add_none();
 
@@ -382,9 +382,9 @@ class ComputeGraph final {
   ValueRef set_output_tensor(const ValueRef idx, const bool use_staging = true);
 
   template <typename Block>
-  const api::BufferBindInfo create_params_buffer(const Block& data) {
+  const vkapi::BufferBindInfo create_params_buffer(const Block& data) {
     param_ubos_.emplace_back(api::ParamsBuffer(context_.get(), data));
-    return api::BufferBindInfo(param_ubos_.back().buffer());
+    return vkapi::BufferBindInfo(param_ubos_.back().buffer());
   }
 
   /*
@@ -392,7 +392,7 @@ class ComputeGraph final {
    */
   inline IOValueRef add_input_tensor(
       const std::vector<int64_t>& sizes,
-      const api::ScalarType dtype,
+      const vkapi::ScalarType dtype,
       const int64_t shared_object_idx = -1) {
     ValueRef t = add_tensor(sizes, dtype, shared_object_idx);
     ValueRef staging = set_input_tensor(t);
@@ -405,8 +405,8 @@ class ComputeGraph final {
    */
   inline IOValueRef add_input_tensor(
       const std::vector<int64_t>& sizes,
-      const api::ScalarType dtype,
-      const api::GPUMemoryLayout memory_layout,
+      const vkapi::ScalarType dtype,
+      const vkapi::GPUMemoryLayout memory_layout,
       const int64_t shared_object_idx = -1) {
     ValueRef t = add_tensor(sizes, dtype, memory_layout, shared_object_idx);
     ValueRef staging = set_input_tensor(t);
@@ -419,8 +419,8 @@ class ComputeGraph final {
    */
   inline IOValueRef add_input_tensor(
       const std::vector<int64_t>& sizes,
-      const api::ScalarType dtype,
-      const api::StorageType storage_type,
+      const vkapi::ScalarType dtype,
+      const vkapi::StorageType storage_type,
       const int64_t shared_object_idx = -1) {
     ValueRef t = add_tensor(sizes, dtype, storage_type, shared_object_idx);
     ValueRef staging = set_input_tensor(t);
@@ -434,7 +434,7 @@ class ComputeGraph final {
   //
 
   void update_descriptor_counts(
-      const api::ShaderInfo& shader_info,
+      const vkapi::ShaderInfo& shader_info,
       bool execute);
 
   void prepare();
diff --git a/backends/vulkan/runtime/graph/GraphConfig.cpp b/backends/vulkan/runtime/graph/GraphConfig.cpp
index 242817f56e4..e0ff6284cca 100644
--- a/backends/vulkan/runtime/graph/GraphConfig.cpp
+++ b/backends/vulkan/runtime/graph/GraphConfig.cpp
@@ -15,7 +15,7 @@ GraphConfig::GraphConfig() {
   const uint32_t cmd_submit_frequency = UINT32_MAX;
 
   // Only one command buffer will be encoded at a time
-  const api::CommandPoolConfig cmd_config{
+  const vkapi::CommandPoolConfig cmd_config{
       1u, // cmd_pool_initial_size
       1u, // cmd_pool_batch_size
   };
@@ -24,7 +24,7 @@ GraphConfig::GraphConfig() {
   // tally up the number of descriptor sets needed while building the graph and
   // trigger descriptor pool initialization with exact sizes before encoding the
   // command buffer.
-  const api::DescriptorPoolConfig descriptor_pool_config{
+  const vkapi::DescriptorPoolConfig descriptor_pool_config{
       0u, // descriptor_pool_max_sets
       0u, // descriptor_uniform_buffer_count
       0u, // descriptor_storage_buffer_count
@@ -33,7 +33,7 @@ GraphConfig::GraphConfig() {
       0u, // descriptor_pile_sizes
   };
 
-  const api::QueryPoolConfig query_pool_config{};
+  const vkapi::QueryPoolConfig query_pool_config{};
 
   context_config = {
       cmd_submit_frequency,
@@ -49,13 +49,13 @@ GraphConfig::GraphConfig() {
   // For now, force kTexture3D storage as we are still developing shader support
   // for buffer storage type.
   enable_storage_type_override = true;
-  storage_type_override = api::kTexture3D;
+  storage_type_override = vkapi::kTexture3D;
 
   // For now, force kWidthPacked memory layout by default as we are still
   // developing support for other memory layouts. In the future memory layout
   // settings will be serialized as part of the graph.
   enable_memory_layout_override = true;
-  memory_layout_override = api::kWidthPacked;
+  memory_layout_override = vkapi::kWidthPacked;
 
   // QueryPool objects are used to measure execution times of individual shader
   // dispatches. By default, this functionality is disabled.
@@ -65,13 +65,13 @@ GraphConfig::GraphConfig() {
   local_wg_size_override = {};
 }
 
-void GraphConfig::set_storage_type_override(api::StorageType storage_type) {
+void GraphConfig::set_storage_type_override(vkapi::StorageType storage_type) {
   enable_storage_type_override = true;
   storage_type_override = storage_type;
 }
 
 void GraphConfig::set_memory_layout_override(
-    api::GPUMemoryLayout memory_layout) {
+    vkapi::GPUMemoryLayout memory_layout) {
   enable_memory_layout_override = true;
   memory_layout_override = memory_layout;
 }
diff --git a/backends/vulkan/runtime/graph/GraphConfig.h b/backends/vulkan/runtime/graph/GraphConfig.h
index 7c732329f26..fbd9f863744 100644
--- a/backends/vulkan/runtime/graph/GraphConfig.h
+++ b/backends/vulkan/runtime/graph/GraphConfig.h
@@ -23,10 +23,10 @@ struct GraphConfig final {
   float descriptor_pool_safety_factor;
 
   bool enable_storage_type_override;
-  api::StorageType storage_type_override;
+  vkapi::StorageType storage_type_override;
 
   bool enable_memory_layout_override;
-  api::GPUMemoryLayout memory_layout_override;
+  vkapi::GPUMemoryLayout memory_layout_override;
 
   bool enable_querypool;
 
@@ -36,8 +36,8 @@ struct GraphConfig final {
   // Generate a default graph config with pre-configured settings
   explicit GraphConfig();
 
-  void set_storage_type_override(api::StorageType storage_type);
-  void set_memory_layout_override(api::GPUMemoryLayout memory_layout);
+  void set_storage_type_override(vkapi::StorageType storage_type);
+  void set_memory_layout_override(vkapi::GPUMemoryLayout memory_layout);
   void set_local_wg_size_override(const utils::uvec3& local_wg_size);
 };
 
diff --git a/backends/vulkan/runtime/graph/Logging.cpp b/backends/vulkan/runtime/graph/Logging.cpp
index 6ae1fa9b147..2e4833bfc64 100644
--- a/backends/vulkan/runtime/graph/Logging.cpp
+++ b/backends/vulkan/runtime/graph/Logging.cpp
@@ -150,7 +150,7 @@ void ComputeGraph::print_readable() {
 
     std::stringstream read_s;
     for (const ArgGroup& arg_group : node->args_) {
-      if (arg_group.access != api::MemoryAccessType::READ) {
+      if (arg_group.access != vkapi::MemoryAccessType::READ) {
         continue;
       }
       read_s << arg_group.refs;
@@ -159,7 +159,7 @@ void ComputeGraph::print_readable() {
 
     std::stringstream write_s;
     for (const ArgGroup& arg_group : node->args_) {
-      if (arg_group.access != api::MemoryAccessType::WRITE) {
+      if (arg_group.access != vkapi::MemoryAccessType::WRITE) {
         continue;
       }
       write_s << arg_group.refs;
diff --git a/backends/vulkan/runtime/graph/containers/Constant.cpp b/backends/vulkan/runtime/graph/containers/Constant.cpp
index c6260bf66a1..cb43295a42a 100644
--- a/backends/vulkan/runtime/graph/containers/Constant.cpp
+++ b/backends/vulkan/runtime/graph/containers/Constant.cpp
@@ -12,7 +12,7 @@ namespace vkcompute {
 
 TensorRef::TensorRef(
     const std::vector<int64_t>& t_sizes,
-    api::ScalarType t_dtype,
+    vkapi::ScalarType t_dtype,
     const void* const t_data)
     : sizes{}, dtype{t_dtype}, data{t_data} {
   size_t ndim = t_sizes.size();
diff --git a/backends/vulkan/runtime/graph/containers/Constant.h b/backends/vulkan/runtime/graph/containers/Constant.h
index a2ce20bad85..9aa3716e28d 100644
--- a/backends/vulkan/runtime/graph/containers/Constant.h
+++ b/backends/vulkan/runtime/graph/containers/Constant.h
@@ -21,12 +21,12 @@ namespace vkcompute {
  */
 struct TensorRef final {
   std::vector<int64_t> sizes;
-  api::ScalarType dtype;
+  vkapi::ScalarType dtype;
   const void* data;
 
   explicit TensorRef(
       const std::vector<int64_t>& t_sizes,
-      api::ScalarType t_dtype,
+      vkapi::ScalarType t_dtype,
       const void* const t_data);
 };
 
diff --git a/backends/vulkan/runtime/graph/containers/SharedObject.cpp b/backends/vulkan/runtime/graph/containers/SharedObject.cpp
index cbc526700c3..0d8b77a5b74 100644
--- a/backends/vulkan/runtime/graph/containers/SharedObject.cpp
+++ b/backends/vulkan/runtime/graph/containers/SharedObject.cpp
@@ -35,7 +35,8 @@ void SharedObject::add_user(ComputeGraph* const graph, const ValueRef idx) {
   VmaAllocationCreateFlags clear_mask = ~VMA_ALLOCATION_CREATE_STRATEGY_MASK;
   VmaAllocationCreateFlags create_flags = create_info.flags & clear_mask;
   // Use the default allocation strategy
-  aggregate_create_info.flags = create_flags | api::DEFAULT_ALLOCATION_STRATEGY;
+  aggregate_create_info.flags =
+      create_flags | vkapi::DEFAULT_ALLOCATION_STRATEGY;
 
   // Set the usage flag if it is currently not set
   if (aggregate_create_info.usage == VMA_MEMORY_USAGE_UNKNOWN) {
diff --git a/backends/vulkan/runtime/graph/containers/SharedObject.h b/backends/vulkan/runtime/graph/containers/SharedObject.h
index 09509ad45b9..9b08fc5efa6 100644
--- a/backends/vulkan/runtime/graph/containers/SharedObject.h
+++ b/backends/vulkan/runtime/graph/containers/SharedObject.h
@@ -12,7 +12,8 @@
 
 #include <executorch/backends/vulkan/runtime/api/Context.h>
 #include <executorch/backends/vulkan/runtime/api/Tensor.h>
-#include <executorch/backends/vulkan/runtime/api/Types.h>
+
+#include <executorch/backends/vulkan/runtime/api/vk_api/Types.h>
 
 #include <executorch/backends/vulkan/runtime/graph/GraphConfig.h>
 
@@ -30,7 +31,7 @@ struct SharedObject {
   VkMemoryRequirements aggregate_memory_requirements;
   VmaAllocationCreateInfo aggregate_create_info;
   std::vector<ValueRef> users;
-  api::Allocation allocation;
+  vkapi::Allocation allocation;
 
   void add_user(ComputeGraph* const graph, const ValueRef idx);
   void allocate(ComputeGraph* const graph);
diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
index 18e6f7ea3c8..3b2a826f87f 100644
--- a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
+++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
@@ -16,12 +16,12 @@ namespace vkcompute {
 
 ExecuteNode::ExecuteNode(
     ComputeGraph& graph,
-    const api::ShaderInfo& shader,
+    const vkapi::ShaderInfo& shader,
     const utils::uvec3& global_workgroup_size,
     const utils::uvec3& local_workgroup_size,
     const std::vector<ArgGroup>& args,
-    const api::ParamsBindList& params,
-    const api::SpecVarList& spec_vars,
+    const vkapi::ParamsBindList& params,
+    const vkapi::SpecVarList& spec_vars,
     const ResizeFunction& resize_fn,
     const std::vector<ValueRef>& resize_args)
     : shader_(shader),
@@ -37,7 +37,7 @@ ExecuteNode::ExecuteNode(
 
 void ExecuteNode::encode(ComputeGraph* graph) {
   api::Context* const context = graph->context();
-  api::PipelineBarrier pipeline_barrier{};
+  vkapi::PipelineBarrier pipeline_barrier{};
 
   std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();
 
@@ -47,7 +47,7 @@ void ExecuteNode::encode(ComputeGraph* graph) {
       local_workgroup_size_,
       node_id_);
 
-  api::DescriptorSet descriptor_set =
+  vkapi::DescriptorSet descriptor_set =
       context->get_descriptor_set(shader_, local_workgroup_size_, spec_vars_);
 
   uint32_t idx = 0;
diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
index 763d37cf81a..1fff14e020e 100644
--- a/backends/vulkan/runtime/graph/ops/ExecuteNode.h
+++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
@@ -21,16 +21,16 @@ class ComputeGraph;
  * access permission.
  */
 struct ArgGroup {
-  ArgGroup(const ValueRef ref, const api::MemoryAccessType access)
+  ArgGroup(const ValueRef ref, const vkapi::MemoryAccessType access)
       : refs{ref}, access(access) {}
 
   ArgGroup(
       const std::vector<ValueRef>& refs,
-      const api::MemoryAccessType access)
+      const vkapi::MemoryAccessType access)
       : refs(refs), access(access) {}
 
   const std::vector<ValueRef> refs;
-  const api::MemoryAccessType access;
+  const vkapi::MemoryAccessType access;
 };
 
 /*
@@ -50,12 +50,12 @@ class ExecuteNode final {
 
   ExecuteNode(
       ComputeGraph& graph,
-      const api::ShaderInfo& shader,
+      const vkapi::ShaderInfo& shader,
       const utils::uvec3& global_workgroup_size,
       const utils::uvec3& local_workgroup_size,
       const std::vector<ArgGroup>& args,
-      const api::ParamsBindList& params,
-      const api::SpecVarList& spec_vars = {},
+      const vkapi::ParamsBindList& params,
+      const vkapi::SpecVarList& spec_vars = {},
       const ResizeFunction& resize_fn = nullptr,
       const std::vector<ValueRef>& resize_args = {});
 
@@ -75,12 +75,12 @@ class ExecuteNode final {
 
  protected:
   uint32_t node_id_;
-  const api::ShaderInfo shader_;
+  const vkapi::ShaderInfo shader_;
   const utils::uvec3 global_workgroup_size_;
   const utils::uvec3 local_workgroup_size_;
   const std::vector<ArgGroup> args_;
-  const api::ParamsBindList params_;
-  const api::SpecVarList spec_vars_;
+  const vkapi::ParamsBindList params_;
+  const vkapi::SpecVarList spec_vars_;
   const ResizeFunction resize_fn_;
   const std::vector<ValueRef> resize_args_;
 };
diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
index 5fe606eb031..b77c62920dd 100644
--- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
+++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
@@ -16,7 +16,7 @@
 
 namespace vkcompute {
 
-api::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) {
+vkapi::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) {
   std::string noop_shader_name("no_op");
   vTensorPtr t_packed = graph.get_tensor(packed);
   add_dtype_suffix(noop_shader_name, *t_packed);
@@ -26,13 +26,13 @@ api::ShaderInfo get_noop_shader(ComputeGraph& graph, const ValueRef packed) {
 
 PrepackNode::PrepackNode(
     ComputeGraph& graph,
-    const api::ShaderInfo& shader,
+    const vkapi::ShaderInfo& shader,
     const utils::uvec3& global_workgroup_size,
     const utils::uvec3& local_workgroup_size,
     const ValueRef tref,
     const ValueRef packed,
-    const api::ParamsBindList& params,
-    const api::SpecVarList& spec_vars)
+    const vkapi::ParamsBindList& params,
+    const vkapi::SpecVarList& spec_vars)
     : shader_(shader),
       noop_shader_(get_noop_shader(graph, packed)),
       global_workgroup_size_(global_workgroup_size),
@@ -49,11 +49,11 @@ api::StorageBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
   vTensorPtr packed = graph->get_tensor(packed_);
 
   // If no TensorRef is provided, create a staging buffer of zeros according to
-  // the api::vTensor metadata.
+  // the vkapi::vTensor metadata.
   if (graph->val_is_none(tref_)) {
     size_t numel = utils::multiply_integers(packed->sizes());
     api::StorageBuffer staging(graph->context(), packed->dtype(), numel);
-    size_t nbytes = numel * api::element_size(packed->dtype());
+    size_t nbytes = numel * vkapi::element_size(packed->dtype());
     set_staging_zeros(staging, nbytes);
     return staging;
   }
@@ -61,7 +61,7 @@ api::StorageBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
   TensorRefPtr tref = graph->get_tref(tref_);
   size_t numel = utils::multiply_integers(tref->sizes);
   api::StorageBuffer staging(graph->context(), tref->dtype, numel);
-  size_t nbytes = numel * api::element_size(tref->dtype);
+  size_t nbytes = numel * vkapi::element_size(tref->dtype);
   copy_ptr_to_staging(tref->data, staging, nbytes);
   return staging;
 }
@@ -75,15 +75,15 @@ void PrepackNode::encode(ComputeGraph* graph) {
   std::unique_lock<std::mutex> cmd_lock = context->dispatch_lock();
 
   {
-    api::PipelineBarrier pipeline_barrier{};
-    api::DescriptorSet descriptor_set =
+    vkapi::PipelineBarrier pipeline_barrier{};
+    vkapi::DescriptorSet descriptor_set =
         context->get_descriptor_set(shader_, local_workgroup_size_, spec_vars_);
 
     uint32_t idx = 0;
     bind_tensor_to_descriptor_set(
         *packed,
         pipeline_barrier,
-        api::MemoryAccessType::WRITE,
+        vkapi::MemoryAccessType::WRITE,
         descriptor_set,
         idx++);
     bind_staging_to_descriptor_set(staging, descriptor_set, idx++);
@@ -98,14 +98,14 @@ void PrepackNode::encode(ComputeGraph* graph) {
   // READ_ONLY_OPTIMAL. This ensures that future uses of the tensor will be
   // bound with the correct image layout.
   {
-    api::PipelineBarrier pipeline_barrier{};
-    api::DescriptorSet descriptor_set =
+    vkapi::PipelineBarrier pipeline_barrier{};
+    vkapi::DescriptorSet descriptor_set =
         context->get_descriptor_set(noop_shader_, {1, 1, 1});
 
     bind_tensor_to_descriptor_set(
         *packed,
         pipeline_barrier,
-        api::MemoryAccessType::READ,
+        vkapi::MemoryAccessType::READ,
         descriptor_set,
         0);
 
diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.h b/backends/vulkan/runtime/graph/ops/PrepackNode.h
index 0b0c32fa876..c3ac8b963fd 100644
--- a/backends/vulkan/runtime/graph/ops/PrepackNode.h
+++ b/backends/vulkan/runtime/graph/ops/PrepackNode.h
@@ -28,13 +28,13 @@ class PrepackNode final {
  public:
   PrepackNode(
       ComputeGraph& graph,
-      const api::ShaderInfo& shader,
+      const vkapi::ShaderInfo& shader,
       const utils::uvec3& global_workgroup_size,
       const utils::uvec3& local_workgroup_size,
       const ValueRef tref,
       const ValueRef packed,
-      const api::ParamsBindList& params,
-      const api::SpecVarList& spec_vars = {});
+      const vkapi::ParamsBindList& params,
+      const vkapi::SpecVarList& spec_vars = {});
 
   ~PrepackNode() = default;
 
@@ -46,14 +46,14 @@ class PrepackNode final {
 
  protected:
   uint32_t node_id_;
-  const api::ShaderInfo shader_;
-  api::ShaderInfo noop_shader_;
+  const vkapi::ShaderInfo shader_;
+  vkapi::ShaderInfo noop_shader_;
   const utils::uvec3 global_workgroup_size_;
   const utils::uvec3 local_workgroup_size_;
   const ValueRef tref_;
   const ValueRef packed_;
-  const api::ParamsBindList params_;
-  const api::SpecVarList spec_vars_;
+  const vkapi::ParamsBindList params_;
+  const vkapi::SpecVarList spec_vars_;
 
  private:
   api::StorageBuffer create_staging_buffer(ComputeGraph* graph);
diff --git a/backends/vulkan/runtime/graph/ops/impl/Arange.cpp b/backends/vulkan/runtime/graph/ops/impl/Arange.cpp
index 696898816a0..136aacf0e15 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Arange.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Arange.cpp
@@ -94,7 +94,7 @@ void add_arange_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE}},
+      {{out, vkapi::MemoryAccessType::WRITE}},
       // Shader params buffers
       {t_out->sizes_ubo(),
        graph.create_params_buffer(start_val),
diff --git a/backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp b/backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp
index 67f43882302..58d16f073d8 100644
--- a/backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/BatchNorm.cpp
@@ -33,7 +33,7 @@ ValueRef prepack_arg(
   // batch_norm's param are broadcasted on the channel dimension.
   // In this implementation, we pack the weights along the x dimension, and
   // in the shader, we lookup using the along the x.
-  return prepack_if_tensor_ref(graph, arg_ref, api::kWidthPacked);
+  return prepack_if_tensor_ref(graph, arg_ref, vkapi::kWidthPacked);
 }
 
 void add_native_batch_norm_node(
@@ -85,9 +85,9 @@ void add_native_batch_norm_node(
       VK_KERNEL_FROM_STR(kernel_name),
       graph.create_global_wg_size(out_ref),
       graph.create_local_wg_size(out_ref),
-      {{out_ref, api::MemoryAccessType::WRITE},
+      {{out_ref, vkapi::MemoryAccessType::WRITE},
        {{in_ref, arg_weight, arg_bias, arg_mean, arg_var},
-        api::MemoryAccessType::READ}},
+        vkapi::MemoryAccessType::READ}},
       {t_out->texture_limits_ubo(),
        graph.create_params_buffer(epsilon),
        graph.create_params_buffer(num_texel_per_batch)}));
diff --git a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
index 044e8f4e8ab..4478c94fa5c 100644
--- a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
@@ -81,8 +81,8 @@ void add_binary_op_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE},
-       {{arg1, arg2}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{arg1, arg2}, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {t_out->sizes_ubo(),
        t_in1->sizes_ubo(),
diff --git a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp
index a2697af27f8..ff1657f338a 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp
@@ -25,7 +25,7 @@ void add_cat_default_node(
 
   for (ValueRef input_ref : *input_list) {
     vTensorPtr t_in = graph.get_tensor(input_ref);
-    VK_CHECK_COND(check_memory_layout_is(*t_in, api::kChannelsPacked));
+    VK_CHECK_COND(check_memory_layout_is(*t_in, vkapi::kChannelsPacked));
   }
 
   int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
diff --git a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp
index 5a8a6a157b1..cef751bc7c8 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp
@@ -30,7 +30,8 @@ void add_clone_node(
       VK_KERNEL_FROM_STR(kernel_name),
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
-      {{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {in, vkapi::MemoryAccessType::READ}},
       {t_out->texture_limits_ubo()}));
 }
 
diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
index ecf53780772..ffbbffa46af 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp
@@ -88,8 +88,8 @@ ValueRef prepack_biases(
     const ValueRef vref,
     const ValueRef weight,
     const bool transposed,
-    const api::StorageType storage_type,
-    const api::GPUMemoryLayout memory_layout) {
+    const vkapi::StorageType storage_type,
+    const vkapi::GPUMemoryLayout memory_layout) {
   auto sizes = graph.sizes_of(weight);
   const int64_t out_channels = transposed ? sizes.at(1) : sizes.at(0);
 
@@ -97,7 +97,7 @@ ValueRef prepack_biases(
       {out_channels}, graph.dtype_of(weight), storage_type, memory_layout);
   vTensorPtr t = graph.get_tensor(v);
 
-  api::ShaderInfo shader = get_nchw_to_tensor_shader(*t);
+  vkapi::ShaderInfo shader = get_nchw_to_tensor_shader(*t);
 
   graph.prepack_nodes().emplace_back(new PrepackNode(
       graph,
@@ -120,7 +120,7 @@ enum class Conv2dMethod : uint8_t {
   Transposed,
 };
 
-api::ShaderInfo get_conv2d_shader(
+vkapi::ShaderInfo get_conv2d_shader(
     ComputeGraph& graph,
     const api::vTensor& t_out,
     const bool prepack_weights,
@@ -196,10 +196,13 @@ ValueRef prepack_weights(
   const auto final_sizes = get_final_sizes(original_sizes, method);
 
   ValueRef v = graph.add_tensor(
-      final_sizes, graph.dtype_of(vref), api::kTexture2D, api::kChannelsPacked);
+      final_sizes,
+      graph.dtype_of(vref),
+      vkapi::kTexture2D,
+      vkapi::kChannelsPacked);
   vTensorPtr t = graph.get_tensor(v);
 
-  api::ShaderInfo shader =
+  vkapi::ShaderInfo shader =
       get_conv2d_shader(graph, *t, /*prepack_weights = */ true, method, vref);
 
   graph.prepack_nodes().emplace_back(new PrepackNode(
@@ -219,8 +222,8 @@ ValueRef prepack_weights(
 }
 
 void check_conv_args(const api::vTensor& in, const api::vTensor& out) {
-  VK_CHECK_COND(check_memory_layout_is(in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(out, vkapi::kChannelsPacked));
 }
 
 struct Conv2dParams final {
@@ -340,8 +343,8 @@ void add_conv2d_node(
       bias,
       weight,
       transposed_val,
-      /* storage_type = */ api::kTexture2D,
-      /* memory_layout = */ api::kWidthPacked);
+      /* storage_type = */ vkapi::kTexture2D,
+      /* memory_layout = */ vkapi::kWidthPacked);
 
   vTensorPtr t_in = graph.get_tensor(arg_in);
   vTensorPtr t_out = graph.get_tensor(out);
@@ -364,7 +367,7 @@ void add_conv2d_node(
 
   check_conv2d_params(kernel_params, transposed_val);
 
-  api::ShaderInfo shader = get_conv2d_shader(
+  vkapi::ShaderInfo shader = get_conv2d_shader(
       graph, *t_out, /*prepack_weights = */ false, method, weight, clamp_out);
 
   graph.execute_nodes().emplace_back(new ExecuteNode(
@@ -373,8 +376,8 @@ void add_conv2d_node(
       create_conv2d_global_wg_size(graph, method, out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE},
-       {{arg_in, arg_weight, arg_bias}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {
           t_out->texture_limits_ubo(),
@@ -404,14 +407,15 @@ void add_conv1d_node(
     const ValueRef out,
     const bool clamp_out) {
   ValueRef arg_in = prepack_if_tensor_ref(graph, in);
-  ValueRef arg_weight = prepack_if_tensor_ref(graph, weight, api::kWidthPacked);
+  ValueRef arg_weight =
+      prepack_if_tensor_ref(graph, weight, vkapi::kWidthPacked);
   ValueRef arg_bias = prepack_biases(
       graph,
       bias,
       weight,
       /*transposed = */ false,
-      /*storage_type = */ api::kTexture3D,
-      /*memory_layout = */ api::kChannelsPacked);
+      /*storage_type = */ vkapi::kTexture3D,
+      /*memory_layout = */ vkapi::kChannelsPacked);
 
   float out_min_val = 0.0f;
   float out_max_val = 0.0f;
@@ -470,8 +474,8 @@ void add_conv1d_node(
       global_size,
       local_size,
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE},
-       {{arg_in, arg_weight, arg_bias}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {
           t_out->texture_limits_ubo(),
diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp
index 3bd8c2c6666..527cf6c6a81 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp
@@ -57,8 +57,8 @@ void add_copy_offset_node(
       graph.create_local_wg_size(out),
       // Inputs and Outputs
       {
-          {out, api::MemoryAccessType::WRITE},
-          {in, api::MemoryAccessType::READ},
+          {out, vkapi::MemoryAccessType::WRITE},
+          {in, vkapi::MemoryAccessType::READ},
       },
       // Parameter buffers
       {graph.create_params_buffer(offset_params)},
@@ -80,8 +80,8 @@ void add_copy_channel_offset_node(
   std::vector<int64_t> in_sizes = t_in->sizes();
   std::vector<int64_t> out_sizes = t_out->sizes();
 
-  VK_CHECK_COND(check_memory_layout_is(*t_in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(*t_out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(*t_in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(*t_out, vkapi::kChannelsPacked));
 
   // NOTE: This function should be able to support 1d and 2d tensors when
   // range=1, src_offset=dst_offset=1.
@@ -174,9 +174,9 @@ void add_copy_channel_offset_node(
         local_size,
         // Inputs and Outputs
         {
-            {out, api::MemoryAccessType::WRITE},
-            {out, api::MemoryAccessType::READ},
-            {in, api::MemoryAccessType::READ},
+            {out, vkapi::MemoryAccessType::WRITE},
+            {out, vkapi::MemoryAccessType::READ},
+            {in, vkapi::MemoryAccessType::READ},
         },
         // Parameter buffers
         {graph.create_params_buffer(channel_offset_params)},
diff --git a/backends/vulkan/runtime/graph/ops/impl/Embedding.cpp b/backends/vulkan/runtime/graph/ops/impl/Embedding.cpp
index 55e019f689a..445a5b6bc2a 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Embedding.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Embedding.cpp
@@ -21,9 +21,9 @@ void check_embedding_args(
     const api::vTensor& weight,
     const api::vTensor& in,
     const api::vTensor& out) {
-  VK_CHECK_COND(check_memory_layout_is(weight, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(weight, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(out, vkapi::kChannelsPacked));
 }
 
 void add_embedding_node(
@@ -46,8 +46,8 @@ void add_embedding_node(
       VK_KERNEL_FROM_STR(kernel_name),
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
-      {{out, api::MemoryAccessType::WRITE},
-       {{in, weight}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{in, weight}, vkapi::MemoryAccessType::READ}},
       {t_out->sizes_ubo()}));
 }
 
diff --git a/backends/vulkan/runtime/graph/ops/impl/Full.cpp b/backends/vulkan/runtime/graph/ops/impl/Full.cpp
index 86be4eaa9fa..157515e6e0a 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Full.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Full.cpp
@@ -50,7 +50,7 @@ void add_full_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE}},
+      {{out, vkapi::MemoryAccessType::WRITE}},
       // Shader params buffers
       {t_out->sizes_ubo(), graph.create_params_buffer(fill_value_val)},
       // Specialization Constants
diff --git a/backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp b/backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp
index f024a952e73..5909f9f202a 100644
--- a/backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp
@@ -21,9 +21,9 @@ void check_index_select_args(
     const api::vTensor& in,
     const api::vTensor& idx,
     const api::vTensor& out) {
-  VK_CHECK_COND(check_memory_layout_is(in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(idx, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(idx, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(out, vkapi::kChannelsPacked));
 }
 
 void add_index_select_channel_node(
@@ -46,8 +46,8 @@ void add_index_select_channel_node(
       VK_KERNEL_FROM_STR(kernel_name),
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
-      {{out, api::MemoryAccessType::WRITE},
-       {{in, idx}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{in, idx}, vkapi::MemoryAccessType::READ}},
       {t_out->sizes_ubo(), t_in->sizes_ubo()}));
 }
 
@@ -95,8 +95,8 @@ void add_index_select_node(
       VK_KERNEL_FROM_STR(kernel_name),
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
-      {{out, api::MemoryAccessType::WRITE},
-       {{in, idx}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{in, idx}, vkapi::MemoryAccessType::READ}},
       {t_out->sizes_ubo(), graph.create_params_buffer(params)}));
 }
 
diff --git a/backends/vulkan/runtime/graph/ops/impl/Linear.cpp b/backends/vulkan/runtime/graph/ops/impl/Linear.cpp
index 8e38b4b3420..c6ca4233fd0 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Linear.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Linear.cpp
@@ -94,8 +94,8 @@ void add_addmm_naive_node(
     const ValueRef out,
     const Params& params,
     const ValueRef mat2_is_transposed) {
-  ValueRef self = prepack_if_tensor_ref(graph, self_data, api::kWidthPacked);
-  ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, api::kHeightPacked);
+  ValueRef self = prepack_if_tensor_ref(graph, self_data, vkapi::kWidthPacked);
+  ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, vkapi::kHeightPacked);
 
   std::string kernel_name =
       graph.get_bool(mat2_is_transposed) ? "linear_naive" : "addmm_naive";
@@ -110,8 +110,8 @@ void add_addmm_naive_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE},
-       {{mat1, mat2, self}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{mat1, mat2, self}, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {
           graph.texture_limits_ubo(out),
@@ -136,11 +136,12 @@ void add_addmm_optimized_node(
     const ValueRef out,
     const Params& params,
     const ValueRef mat2_is_transposed) {
-  ValueRef self = prepack_if_tensor_ref(graph, self_data, api::kChannelsPacked);
-  ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, api::kHeightPacked);
+  ValueRef self =
+      prepack_if_tensor_ref(graph, self_data, vkapi::kChannelsPacked);
+  ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, vkapi::kHeightPacked);
 
   // Ensure mat1 is width packed
-  ValueRef mat1_W_packed = graph.add_tensor_like(mat1, api::kWidthPacked);
+  ValueRef mat1_W_packed = graph.add_tensor_like(mat1, vkapi::kWidthPacked);
   auto viewFn = VK_GET_OP_FN("aten.view_copy.default");
   viewFn(graph, {mat1, graph.add_none(), mat1_W_packed});
 
@@ -148,8 +149,8 @@ void add_addmm_optimized_node(
 
   // Ensure mat2 is height packed
   ValueRef mat2_packed = mat2;
-  const api::GPUMemoryLayout mat2_layout =
-      mat2_is_transposed_val ? api::kWidthPacked : api::kHeightPacked;
+  const vkapi::GPUMemoryLayout mat2_layout =
+      mat2_is_transposed_val ? vkapi::kWidthPacked : vkapi::kHeightPacked;
   if (graph.memory_layout_of(mat2) != mat2_layout) {
     mat2_packed = graph.add_tensor_like(mat2, mat2_layout);
     viewFn(graph, {mat2, graph.add_none(), mat2_packed});
@@ -186,8 +187,8 @@ void add_addmm_optimized_node(
       global_size,
       local_size,
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE},
-       {{mat1_W_packed, mat2_packed, self}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{mat1_W_packed, mat2_packed, self}, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {
           graph.texture_limits_ubo(out),
@@ -223,10 +224,10 @@ void add_addmm_node(
   }
 
   Params params = {alpha_val, beta_val};
-  if (graph.memory_layout_of(mat1) == api::kChannelsPacked) {
+  if (graph.memory_layout_of(mat1) == vkapi::kChannelsPacked) {
     add_addmm_optimized_node(
         graph, self, mat1, mat2, beta, alpha, out, params, mat2_is_transposed);
-  } else if (graph.memory_layout_of(mat1) == api::kWidthPacked) {
+  } else if (graph.memory_layout_of(mat1) == vkapi::kWidthPacked) {
     add_addmm_naive_node(
         graph, self, mat1, mat2, beta, alpha, out, params, mat2_is_transposed);
   } else {
@@ -254,7 +255,7 @@ void linear(ComputeGraph& graph, const std::vector<ValueRef>& args) {
   ValueRef bias = args.at(2);
   ValueRef out = args.at(3);
   ValueRef weight =
-      prepack_if_tensor_ref(graph, weight_data, api::kWidthPacked);
+      prepack_if_tensor_ref(graph, weight_data, vkapi::kWidthPacked);
   ValueRef mat2_is_transposed = graph.add_scalar(true);
   if (graph.val_is_none(bias)) {
     return add_matmul_node(graph, input, weight, out, mat2_is_transposed);
diff --git a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp
index be6929c2b65..6cdffc85722 100644
--- a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp
@@ -68,7 +68,7 @@ void add_matmul_naive_node(
     const ValueRef mat2_data,
     const ValueRef out,
     const ValueRef mat2_is_transposed) {
-  ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, api::kHeightPacked);
+  ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, vkapi::kHeightPacked);
 
   std::string kernel_name = graph.get_bool(mat2_is_transposed)
       ? "matmul_transposed_naive"
@@ -84,8 +84,8 @@ void add_matmul_naive_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE},
-       {{mat1, mat2}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{mat1, mat2}, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {
           graph.texture_limits_ubo(out),
@@ -104,10 +104,10 @@ void add_matmul_optimized_node(
     const ValueRef mat2_data,
     const ValueRef out,
     const ValueRef mat2_is_transposed) {
-  ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, api::kHeightPacked);
+  ValueRef mat2 = prepack_if_tensor_ref(graph, mat2_data, vkapi::kHeightPacked);
 
   // Ensure mat1 is width packed
-  ValueRef mat1_W_packed = graph.add_tensor_like(mat1, api::kWidthPacked);
+  ValueRef mat1_W_packed = graph.add_tensor_like(mat1, vkapi::kWidthPacked);
   auto viewFn = VK_GET_OP_FN("aten.view_copy.default");
   viewFn(graph, {mat1, graph.add_none(), mat1_W_packed});
 
@@ -115,8 +115,8 @@ void add_matmul_optimized_node(
 
   // Ensure mat2 to height packed
   ValueRef mat2_packed = mat2;
-  const api::GPUMemoryLayout mat2_layout =
-      mat2_is_transposed_val ? api::kWidthPacked : api::kHeightPacked;
+  const vkapi::GPUMemoryLayout mat2_layout =
+      mat2_is_transposed_val ? vkapi::kWidthPacked : vkapi::kHeightPacked;
   if (graph.memory_layout_of(mat2) != mat2_layout) {
     mat2_packed = graph.add_tensor_like(mat2, mat2_layout);
     viewFn(graph, {mat2, graph.add_none(), mat2_packed});
@@ -153,8 +153,8 @@ void add_matmul_optimized_node(
       global_size,
       local_size,
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE},
-       {{mat1_W_packed, mat2_packed}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{mat1_W_packed, mat2_packed}, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {
           graph.texture_limits_ubo(out),
@@ -174,9 +174,9 @@ void add_matmul_node(
     const ValueRef mat2_data,
     const ValueRef out,
     const ValueRef mat2_is_transposed) {
-  if (graph.memory_layout_of(mat1) == api::kChannelsPacked) {
+  if (graph.memory_layout_of(mat1) == vkapi::kChannelsPacked) {
     add_matmul_optimized_node(graph, mat1, mat2_data, out, mat2_is_transposed);
-  } else if (graph.memory_layout_of(mat1) == api::kWidthPacked) {
+  } else if (graph.memory_layout_of(mat1) == vkapi::kWidthPacked) {
     add_matmul_naive_node(graph, mat1, mat2_data, out, mat2_is_transposed);
   } else {
     VK_THROW("Input should be channel packed or width packed.");
diff --git a/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp b/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp
index b543256cdaa..17f00d5e829 100644
--- a/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp
@@ -49,8 +49,8 @@ void resize_native_layer_norm_node(
 }
 
 void check_args(const api::vTensor& in, const api::vTensor& out) {
-  VK_CHECK_COND(check_memory_layout_is(in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(out, vkapi::kChannelsPacked));
 }
 
 void add_native_layer_norm_node(
@@ -106,8 +106,8 @@ void add_native_layer_norm_node(
       local_size,
       // Inputs and Outputs
       {{{out_val->at(0), out_val->at(1), out_val->at(2)},
-        api::MemoryAccessType::WRITE},
-       {{arg_in, arg_weight, arg_bias}, api::MemoryAccessType::READ}},
+        vkapi::MemoryAccessType::WRITE},
+       {{arg_in, arg_weight, arg_bias}, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {t_out->texture_limits_ubo(),
        t_out->sizes_ubo(),
diff --git a/backends/vulkan/runtime/graph/ops/impl/Pad.cpp b/backends/vulkan/runtime/graph/ops/impl/Pad.cpp
index 626a64f4dcf..3bdf8d789d1 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Pad.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Pad.cpp
@@ -84,7 +84,8 @@ void add_constant_pad_nd_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {in, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {t_out->sizes_ubo(),
        t_in->sizes_ubo(),
diff --git a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp
index 9855ec85d70..9627f9716e2 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp
@@ -28,8 +28,8 @@ void check_args(
     const api::vTensor& in,
     const std::vector<int64_t>& permute_dims,
     const api::vTensor& out) {
-  VK_CHECK_COND(check_memory_layout_is(in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(out, vkapi::kChannelsPacked));
 
   // This implementation doesn't not requires the input tensor to have the same
   // dim size as the argument. The code will work as long as the input tensor's
@@ -90,7 +90,8 @@ void add_permute_node(
       VK_KERNEL_FROM_STR(kernel_name),
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
-      {{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {in, vkapi::MemoryAccessType::READ}},
       {t_out->texture_limits_ubo(),
        t_out->sizes_ubo(),
        graph.create_params_buffer(params)},
diff --git a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp
index b549d9a71a6..b204ac3f57f 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp
@@ -18,8 +18,8 @@
 namespace vkcompute {
 
 void check_pool2d_args(const api::vTensor& in, const api::vTensor& out) {
-  VK_CHECK_COND(check_memory_layout_is(in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(out, vkapi::kChannelsPacked));
 }
 
 void resize_pool2d_node(
@@ -99,8 +99,8 @@ void add_max_pool2d_node(
       global_size,
       local_size,
       // Inputs and Outputs
-      {{{out_val->at(0), out_val->at(1)}, api::MemoryAccessType::WRITE},
-       {arg, api::MemoryAccessType::READ}},
+      {{{out_val->at(0), out_val->at(1)}, vkapi::MemoryAccessType::WRITE},
+       {arg, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {
           t_out->texture_limits_ubo(),
@@ -173,7 +173,8 @@ void add_avg_pool2d_node(
       global_size,
       local_size,
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE}, {arg, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {arg, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {t_out->texture_limits_ubo(),
        t_in->sizes_ubo(),
diff --git a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp
index fe15c57b97c..f828da427ab 100644
--- a/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/QuantizedLinear.cpp
@@ -72,9 +72,9 @@ void add_q_8w_linear_node(
     const ValueRef scales_data,
     const ValueRef out) {
   ValueRef q_mat2 =
-      prepack_if_tensor_ref(graph, q_mat2_data, api::kWidthPacked);
+      prepack_if_tensor_ref(graph, q_mat2_data, vkapi::kWidthPacked);
   ValueRef scales =
-      prepack_if_tensor_ref(graph, scales_data, api::kWidthPacked);
+      prepack_if_tensor_ref(graph, scales_data, vkapi::kWidthPacked);
 
   std::string kernel_name = "q_8w_linear";
   kernel_name.reserve(kShaderNameReserve);
@@ -83,7 +83,7 @@ void add_q_8w_linear_node(
   add_dtype_suffix(kernel_name, graph.dtype_of(out));
   add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
 
-  api::ParamsBindList ubos({});
+  vkapi::ParamsBindList ubos({});
   if (graph.is_buffer_storage(out)) {
     ubos.append(
         {graph.sizes_ubo(out),
@@ -103,8 +103,8 @@ void add_q_8w_linear_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE},
-       {{mat1, q_mat2, scales}, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {{mat1, q_mat2, scales}, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       ubos,
       // Specialization Constants
diff --git a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp
index e93e9068a68..7ca6234b8ba 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp
@@ -23,8 +23,8 @@ void check_args(
     const api::vTensor& in,
     const std::vector<int64_t>& repeats,
     const api::vTensor& out) {
-  VK_CHECK_COND(check_memory_layout_is(in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(out, vkapi::kChannelsPacked));
 
   int64_t in_dim = in.dim();
   VK_CHECK_COND(
@@ -104,7 +104,8 @@ void add_repeat_channel_node(
       global_size,
       local_size,
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {in, vkapi::MemoryAccessType::READ}},
       // Parameter buffers
       {graph.create_params_buffer(repeat_channel_args)},
       // Specialization Constants
diff --git a/backends/vulkan/runtime/graph/ops/impl/Select.cpp b/backends/vulkan/runtime/graph/ops/impl/Select.cpp
index c4b1cd6ed08..b17d4128dac 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Select.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Select.cpp
@@ -22,8 +22,8 @@ void check_args(
     int64_t dim,
     int64_t index,
     const api::vTensor& t_out) {
-  VK_CHECK_COND(check_memory_layout_is(t_in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(t_out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(t_in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(t_out, vkapi::kChannelsPacked));
 
   const int64_t in_dim = t_in.dim();
   VK_CHECK_COND(
@@ -109,7 +109,8 @@ void add_select_int_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {in, vkapi::MemoryAccessType::READ}},
       // Parameter buffers
       {t_out->texture_limits_ubo(),
        t_out->sizes_ubo(),
diff --git a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp
index 43ed387dac6..48ebf534190 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp
@@ -42,8 +42,8 @@ void add_slice_tensor_out_node(
   vTensorPtr t_in = graph.get_tensor(in);
   vTensorPtr t_out = graph.get_tensor(out);
 
-  VK_CHECK_COND(check_memory_layout_is(*t_in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(*t_out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(*t_in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(*t_out, vkapi::kChannelsPacked));
 
   // Need normalize the dim
   int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
@@ -93,8 +93,8 @@ void add_slice_tensor_out_node(
         VK_KERNEL_FROM_STR(kernel_name),
         graph.create_global_wg_size(out),
         graph.create_local_wg_size(out),
-        {{out, api::MemoryAccessType::WRITE},
-         {in, api::MemoryAccessType::READ}},
+        {{out, vkapi::MemoryAccessType::WRITE},
+         {in, vkapi::MemoryAccessType::READ}},
         {t_out->sizes_ubo(),
          t_in->sizes_ubo(),
          graph.create_params_buffer(params)}));
@@ -143,8 +143,8 @@ void add_slice_tensor_out_node(
         VK_KERNEL_FROM_STR(kernel_name),
         global_size,
         local_size,
-        {{out, api::MemoryAccessType::WRITE},
-         {in, api::MemoryAccessType::READ}},
+        {{out, vkapi::MemoryAccessType::WRITE},
+         {in, vkapi::MemoryAccessType::READ}},
         {t_out->sizes_ubo(), graph.create_params_buffer(params)}));
   }
 }
diff --git a/backends/vulkan/runtime/graph/ops/impl/Softmax.cpp b/backends/vulkan/runtime/graph/ops/impl/Softmax.cpp
index 6877056966d..fa4d3df944f 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Softmax.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Softmax.cpp
@@ -44,7 +44,7 @@ void add_softmax_node(
 
   vTensorPtr t_out = graph.get_tensor(out);
 
-  api::ShaderInfo shader_descriptor;
+  vkapi::ShaderInfo shader_descriptor;
   std::string kernel_name = in_dim - softmax_dim == 3
       ? "softmax_channel"
       : "softmax_batch_height_width";
@@ -61,8 +61,8 @@ void add_softmax_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE},
-       {in_arg, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {in_arg, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {t_out->texture_limits_ubo(),
        t_in->sizes_ubo(),
diff --git a/backends/vulkan/runtime/graph/ops/impl/Split.cpp b/backends/vulkan/runtime/graph/ops/impl/Split.cpp
index c77f248f20f..8c9e0671089 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Split.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Split.cpp
@@ -25,7 +25,7 @@ void add_split_with_sizes_default_node(
     ValueRef out_list_ref) {
   vTensorPtr t_in = graph.get_tensor(in);
 
-  VK_CHECK_COND(check_memory_layout_is(*t_in, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(*t_in, vkapi::kChannelsPacked));
 
   ValueListPtr out_list = graph.get_value_list(out_list_ref);
 
@@ -38,7 +38,7 @@ void add_split_with_sizes_default_node(
     ValueRef out_ref = (*out_list)[split_idx];
 
     vTensorPtr t_out = graph.get_tensor(out_ref);
-    VK_CHECK_COND(check_memory_layout_is(*t_out, api::kChannelsPacked));
+    VK_CHECK_COND(check_memory_layout_is(*t_out, vkapi::kChannelsPacked));
     VK_CHECK_COND(dim_at(*t_out, dim_index) == split_size);
   }
 
diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
index e52920647d1..438a1cfcf02 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp
@@ -21,10 +21,10 @@ void add_staging_to_tensor_node(
     const ValueRef out_tensor) {
   VK_CHECK_COND(graph.val_is_staging(in_staging));
 
-  api::ShaderInfo shader =
+  vkapi::ShaderInfo shader =
       get_nchw_to_tensor_shader(*graph.get_tensor(out_tensor));
 
-  api::ParamsBindList ubos({graph.sizes_ubo(out_tensor)});
+  vkapi::ParamsBindList ubos({graph.sizes_ubo(out_tensor)});
   if (graph.is_buffer_storage(out_tensor)) {
     ubos.append({
         graph.texel_strides_ubo(out_tensor),
@@ -38,8 +38,8 @@ void add_staging_to_tensor_node(
       graph.create_global_wg_size(out_tensor),
       graph.create_local_wg_size(out_tensor),
       // Input and Outputs
-      {{out_tensor, api::MemoryAccessType::WRITE},
-       {in_staging, api::MemoryAccessType::READ}},
+      {{out_tensor, vkapi::MemoryAccessType::WRITE},
+       {in_staging, vkapi::MemoryAccessType::READ}},
       // Parameter Buffers
       ubos,
       // Specialization Constants
@@ -55,10 +55,10 @@ void add_tensor_to_staging_node(
     const ValueRef out_staging) {
   VK_CHECK_COND(graph.val_is_staging(out_staging));
 
-  api::ShaderInfo shader =
+  vkapi::ShaderInfo shader =
       get_tensor_to_nchw_shader(*graph.get_tensor(in_tensor));
 
-  api::ParamsBindList ubos({graph.sizes_ubo(in_tensor)});
+  vkapi::ParamsBindList ubos({graph.sizes_ubo(in_tensor)});
   if (graph.is_buffer_storage(in_tensor)) {
     ubos.append({
         graph.texel_strides_ubo(in_tensor),
@@ -72,8 +72,8 @@ void add_tensor_to_staging_node(
       graph.create_global_wg_size(in_tensor),
       graph.create_local_wg_size(in_tensor),
       // Input and Outputs
-      {{in_tensor, api::MemoryAccessType::READ},
-       {out_staging, api::MemoryAccessType::WRITE}},
+      {{in_tensor, vkapi::MemoryAccessType::READ},
+       {out_staging, vkapi::MemoryAccessType::WRITE}},
       // Parameter Buffers
       ubos,
       // Specialization Constants
@@ -83,12 +83,12 @@ void add_tensor_to_staging_node(
 ValueRef prepack(
     ComputeGraph& graph,
     const ValueRef vref,
-    const api::GPUMemoryLayout layout) {
+    const vkapi::GPUMemoryLayout layout) {
   ValueRef v = graph.add_tensor_like(vref, layout);
 
-  api::ShaderInfo shader = get_nchw_to_tensor_shader(*graph.get_tensor(v));
+  vkapi::ShaderInfo shader = get_nchw_to_tensor_shader(*graph.get_tensor(v));
 
-  api::ParamsBindList ubos({graph.sizes_ubo(v)});
+  vkapi::ParamsBindList ubos({graph.sizes_ubo(v)});
   if (graph.is_buffer_storage(v)) {
     ubos.append({
         graph.texel_strides_ubo(v),
@@ -115,7 +115,7 @@ ValueRef prepack(
 ValueRef prepack_if_tensor_ref(
     ComputeGraph& graph,
     const ValueRef v,
-    const api::GPUMemoryLayout layout) {
+    const vkapi::GPUMemoryLayout layout) {
   if (graph.val_is_tref(v)) {
     return prepack(graph, v, layout);
   } else {
@@ -125,7 +125,7 @@ ValueRef prepack_if_tensor_ref(
 
 ValueRef prepack_if_tensor_ref(ComputeGraph& graph, const ValueRef v) {
   if (graph.val_is_tref(v)) {
-    api::GPUMemoryLayout layout =
+    vkapi::GPUMemoryLayout layout =
         graph.suggested_memory_layout(graph.get_tref(v)->sizes);
     return prepack(graph, v, layout);
   } else {
diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.h b/backends/vulkan/runtime/graph/ops/impl/Staging.h
index 52399656445..dc5e048b387 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Staging.h
+++ b/backends/vulkan/runtime/graph/ops/impl/Staging.h
@@ -27,7 +27,7 @@ void add_tensor_to_staging_node(
 ValueRef prepack_if_tensor_ref(
     ComputeGraph& graph,
     const ValueRef v,
-    const api::GPUMemoryLayout layout);
+    const vkapi::GPUMemoryLayout layout);
 
 ValueRef prepack_if_tensor_ref(ComputeGraph& graph, const ValueRef v);
 
diff --git a/backends/vulkan/runtime/graph/ops/impl/Sum.cpp b/backends/vulkan/runtime/graph/ops/impl/Sum.cpp
index e300438644b..e67905a158f 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Sum.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Sum.cpp
@@ -47,8 +47,8 @@ void resize_sum_node(
 }
 
 void check_sum_args(const api::vTensor& in, const api::vTensor& out) {
-  VK_CHECK_COND(check_memory_layout_is(in, api::kChannelsPacked));
-  VK_CHECK_COND(check_memory_layout_is(out, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(in, vkapi::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(out, vkapi::kChannelsPacked));
 }
 
 void add_sum_dim_node(
@@ -82,7 +82,8 @@ void add_sum_dim_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE}, {arg, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {arg, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {t_out->texture_limits_ubo(),
        graph.create_params_buffer(dim + 4 - in_dim),
@@ -100,10 +101,10 @@ ValueRef add_node(
     const ValueRef input,
     const int dim,
     const bool keepdim,
-    const api::ScalarType dtype = api::kFloat) {
+    const vkapi::ScalarType dtype = vkapi::kFloat) {
   std::vector<int64_t> output_size =
       calc_out_sizes(*(graph.get_tensor(input)), dim, keepdim);
-  return graph.add_tensor(output_size, dtype, api::kChannelsPacked);
+  return graph.add_tensor(output_size, dtype, vkapi::kChannelsPacked);
 }
 
 void add_sum_dim_IntList(
diff --git a/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp
index d64f82ee632..4342be7229f 100644
--- a/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp
@@ -42,7 +42,7 @@ void add_unary_op_node(
   add_dtype_suffix(kernel_name, graph.dtype_of(out));
   add_storage_type_suffix(kernel_name, graph.storage_type_of(out));
 
-  api::ParamsBindList ubos({});
+  vkapi::ParamsBindList ubos({});
   if (graph.is_buffer_storage(out)) {
     ubos.append({graph.ntexels_ubo(out)});
   } else {
@@ -57,7 +57,8 @@ void add_unary_op_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {in, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       ubos,
       // Specialization Constants
diff --git a/backends/vulkan/runtime/graph/ops/impl/Upsample.cpp b/backends/vulkan/runtime/graph/ops/impl/Upsample.cpp
index 45135b2018d..29baff4bdee 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Upsample.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Upsample.cpp
@@ -102,8 +102,8 @@ void add_upsample_nearest2d_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE},
-       {arg_in, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {arg_in, vkapi::MemoryAccessType::READ}},
       // Shader params buffers
       {t_out->texture_limits_ubo(),
        graph.create_params_buffer(input_size),
diff --git a/backends/vulkan/runtime/graph/ops/impl/View.cpp b/backends/vulkan/runtime/graph/ops/impl/View.cpp
index bff07b6bb93..507dbdcf8b1 100644
--- a/backends/vulkan/runtime/graph/ops/impl/View.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/View.cpp
@@ -71,7 +71,8 @@ void add_view_node(
       graph.create_global_wg_size(out),
       graph.create_local_wg_size(out),
       // Inputs and Outputs
-      {{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}},
+      {{out, vkapi::MemoryAccessType::WRITE},
+       {in, vkapi::MemoryAccessType::READ}},
       // Parameter Buffers
       {t_out->sizes_ubo(), t_in->sizes_ubo()},
       // Specialization Constants
diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp
index 546bd2044d2..74cfdf8272d 100644
--- a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.cpp
@@ -47,7 +47,7 @@ bool check_same_sizes_at(
 
 bool check_memory_layout_is(
     const api::vTensor& t,
-    api::GPUMemoryLayout layout) {
+    vkapi::GPUMemoryLayout layout) {
   return t.gpu_memory_layout() == layout;
 }
 
@@ -79,11 +79,11 @@ bool is_packed_dim_broadcasted(
   // We assume that the tensors are broadcastable. If values aren't equal at
   // some index, then the value of rcvr is 1 and hence should be broadcasted.
   switch (sndr.gpu_memory_layout()) {
-    case api::kChannelsPacked:
+    case vkapi::kChannelsPacked:
       return utils::val_at(-3, sndr.sizes()) > utils::val_at(-3, rcvr.sizes());
-    case api::kHeightPacked:
+    case vkapi::kHeightPacked:
       return utils::val_at(-2, sndr.sizes()) > utils::val_at(-2, rcvr.sizes());
-    case api::kWidthPacked:
+    case vkapi::kWidthPacked:
       return utils::val_at(-1, sndr.sizes()) > utils::val_at(-1, rcvr.sizes());
   }
 }
diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h
index 4b8dea1bd22..7eb3c3d7ac5 100644
--- a/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h
+++ b/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h
@@ -34,7 +34,9 @@ bool check_same_sizes_at(
     const api::vTensor& t2,
     int64_t d2);
 
-bool check_memory_layout_is(const api::vTensor& t, api::GPUMemoryLayout layout);
+bool check_memory_layout_is(
+    const api::vTensor& t,
+    vkapi::GPUMemoryLayout layout);
 
 bool check_same_memory_layout(const api::vTensor& t1, const api::vTensor& t2);
 
diff --git a/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp
index effd3d9a36e..b0964ace225 100644
--- a/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp
+++ b/backends/vulkan/runtime/graph/ops/utils/BindingUtils.cpp
@@ -12,17 +12,17 @@ namespace vkcompute {
 
 void bind_tensor_to_descriptor_set(
     api::vTensor& tensor,
-    api::PipelineBarrier& pipeline_barrier,
-    const api::MemoryAccessType accessType,
-    api::DescriptorSet& descriptor_set,
+    vkapi::PipelineBarrier& pipeline_barrier,
+    const vkapi::MemoryAccessType accessType,
+    vkapi::DescriptorSet& descriptor_set,
     const uint32_t idx) {
   if (tensor.buffer()) {
-    api::VulkanBuffer& buffer = tensor.buffer(
-        pipeline_barrier, api::PipelineStage::COMPUTE, accessType);
+    vkapi::VulkanBuffer& buffer = tensor.buffer(
+        pipeline_barrier, vkapi::PipelineStage::COMPUTE, accessType);
     descriptor_set.bind(idx, buffer);
   } else {
-    api::VulkanImage& image =
-        tensor.image(pipeline_barrier, api::PipelineStage::COMPUTE, accessType);
+    vkapi::VulkanImage& image = tensor.image(
+        pipeline_barrier, vkapi::PipelineStage::COMPUTE, accessType);
     descriptor_set.bind(idx, image);
   }
 }
@@ -30,8 +30,8 @@ void bind_tensor_to_descriptor_set(
 uint32_t bind_values_to_descriptor_set(
     ComputeGraph* graph,
     const std::vector<ArgGroup>& args,
-    api::PipelineBarrier& pipeline_barrier,
-    api::DescriptorSet& descriptor_set,
+    vkapi::PipelineBarrier& pipeline_barrier,
+    vkapi::DescriptorSet& descriptor_set,
     const uint32_t base_idx) {
   uint32_t idx = base_idx;
   for (auto& arg : args) {
@@ -55,8 +55,8 @@ uint32_t bind_values_to_descriptor_set(
 }
 
 uint32_t bind_params_to_descriptor_set(
-    const api::ParamsBindList& params,
-    api::DescriptorSet& descriptor_set,
+    const vkapi::ParamsBindList& params,
+    vkapi::DescriptorSet& descriptor_set,
     const uint32_t base_idx) {
   uint32_t idx = base_idx;
   for (auto& param : params.bind_infos) {
@@ -67,7 +67,7 @@ uint32_t bind_params_to_descriptor_set(
 
 void bind_staging_to_descriptor_set(
     api::StorageBuffer& staging,
-    api::DescriptorSet& descriptor_set,
+    vkapi::DescriptorSet& descriptor_set,
     const uint32_t idx) {
   descriptor_set.bind(idx, staging.buffer());
 }
diff --git a/backends/vulkan/runtime/graph/ops/utils/BindingUtils.h b/backends/vulkan/runtime/graph/ops/utils/BindingUtils.h
index 39666f35f7e..3a7ec029da7 100644
--- a/backends/vulkan/runtime/graph/ops/utils/BindingUtils.h
+++ b/backends/vulkan/runtime/graph/ops/utils/BindingUtils.h
@@ -18,16 +18,16 @@ namespace vkcompute {
 
 void bind_tensor_to_descriptor_set(
     api::vTensor& tensor,
-    api::PipelineBarrier& pipeline_barrier,
-    const api::MemoryAccessType accessType,
-    api::DescriptorSet& descriptor_set,
+    vkapi::PipelineBarrier& pipeline_barrier,
+    const vkapi::MemoryAccessType accessType,
+    vkapi::DescriptorSet& descriptor_set,
     const uint32_t idx);
 
 uint32_t bind_values_to_descriptor_set(
     ComputeGraph* graph,
     const std::vector<ArgGroup>& args,
-    api::PipelineBarrier& pipeline_barrier,
-    api::DescriptorSet& descriptor_set,
+    vkapi::PipelineBarrier& pipeline_barrier,
+    vkapi::DescriptorSet& descriptor_set,
     const uint32_t base_idx);
 
 //
@@ -35,13 +35,13 @@ uint32_t bind_values_to_descriptor_set(
 //
 
 uint32_t bind_params_to_descriptor_set(
-    const api::ParamsBindList& params,
-    api::DescriptorSet& descriptor_set,
+    const vkapi::ParamsBindList& params,
+    vkapi::DescriptorSet& descriptor_set,
     const uint32_t base_idx);
 
 void bind_staging_to_descriptor_set(
     api::StorageBuffer& staging,
-    api::DescriptorSet& descriptor_set,
+    vkapi::DescriptorSet& descriptor_set,
     const uint32_t idx);
 
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp
index 291f6b6e60e..7c88300759c 100644
--- a/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp
+++ b/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.cpp
@@ -12,15 +12,15 @@ namespace vkcompute {
 
 void add_storage_type_suffix(
     std::string& kernel_name,
-    const api::StorageType storage_type) {
+    const vkapi::StorageType storage_type) {
   switch (storage_type) {
-    case api::kBuffer:
+    case vkapi::kBuffer:
       kernel_name += "_buffer";
       break;
-    case api::kTexture3D:
+    case vkapi::kTexture3D:
       kernel_name += "_texture3d";
       break;
-    case api::kTexture2D:
+    case vkapi::kTexture2D:
       kernel_name += "_texture2d";
       break;
   }
@@ -32,19 +32,19 @@ void add_storage_type_suffix(
   return add_storage_type_suffix(kernel_name, tensor.storage_type());
 }
 
-void add_dtype_suffix(std::string& kernel_name, const api::ScalarType dtype) {
+void add_dtype_suffix(std::string& kernel_name, const vkapi::ScalarType dtype) {
   switch (dtype) {
-    case api::kFloat:
+    case vkapi::kFloat:
       kernel_name += "_float";
       break;
-    case api::kHalf:
+    case vkapi::kHalf:
       kernel_name += "_half";
       break;
-    case api::kInt:
+    case vkapi::kInt:
       kernel_name += "_int";
       break;
-    case api::kChar:
-    case api::kQInt8:
+    case vkapi::kChar:
+    case vkapi::kQInt8:
       kernel_name += "_int8";
       break;
     default:
@@ -58,10 +58,10 @@ void add_dtype_suffix(std::string& kernel_name, const api::vTensor& tensor) {
 
 void add_ndim_suffix(std::string& kernel_name, const api::vTensor& tensor) {
   switch (tensor.storage_type()) {
-    case api::kTexture3D:
+    case vkapi::kTexture3D:
       kernel_name += "_3d";
       break;
-    case api::kTexture2D:
+    case vkapi::kTexture2D:
       kernel_name += "_2d";
       break;
     default:
@@ -71,15 +71,15 @@ void add_ndim_suffix(std::string& kernel_name, const api::vTensor& tensor) {
 
 void add_memory_layout_suffix(
     std::string& kernel_name,
-    api::GPUMemoryLayout layout) {
+    vkapi::GPUMemoryLayout layout) {
   switch (layout) {
-    case api::kChannelsPacked:
+    case vkapi::kChannelsPacked:
       kernel_name += "_C_packed";
       break;
-    case api::kHeightPacked:
+    case vkapi::kHeightPacked:
       kernel_name += "_H_packed";
       break;
-    case api::kWidthPacked:
+    case vkapi::kWidthPacked:
       kernel_name += "_W_packed";
       break;
     default:
diff --git a/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h b/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h
index c04a1d0e206..7925b09a4c0 100644
--- a/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h
+++ b/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h
@@ -18,12 +18,12 @@ constexpr size_t kShaderNameReserve = 64u;
 
 void add_storage_type_suffix(
     std::string& kernel_name,
-    const api::StorageType storage_type);
+    const vkapi::StorageType storage_type);
 void add_storage_type_suffix(
     std::string& kernel_name,
     const api::vTensor& tensor);
 
-void add_dtype_suffix(std::string& kernel_name, const api::ScalarType dtype);
+void add_dtype_suffix(std::string& kernel_name, const vkapi::ScalarType dtype);
 void add_dtype_suffix(std::string& kernel_name, const api::vTensor& tensor);
 
 void add_ndim_suffix(std::string& kernel_name, const size_t ndim);
@@ -31,7 +31,7 @@ void add_ndim_suffix(std::string& kernel_name, const api::vTensor& tensor);
 
 void add_memory_layout_suffix(
     std::string& kernel_name,
-    const api::GPUMemoryLayout layout);
+    const vkapi::GPUMemoryLayout layout);
 void add_memory_layout_suffix(
     std::string& kernel_name,
     const api::vTensor& tensor);
diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
index 6fe618f70a8..d681618d9d4 100644
--- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
+++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
@@ -20,7 +20,7 @@ namespace vkcompute {
 template <typename T>
 void memcpy_to_mapping_impl(
     const void* src,
-    api::MemoryMap& dst_mapping,
+    vkapi::MemoryMap& dst_mapping,
     const size_t nbytes) {
   T* data_ptr = dst_mapping.template data<T>();
   memcpy(data_ptr, reinterpret_cast<const T*>(src), nbytes);
@@ -28,7 +28,7 @@ void memcpy_to_mapping_impl(
 
 template <typename T>
 void memcpy_from_mapping_impl(
-    api::MemoryMap& src_mapping,
+    vkapi::MemoryMap& src_mapping,
     void* dst,
     const size_t nbytes) {
   T* data_ptr = src_mapping.template data<T>();
@@ -37,11 +37,11 @@ void memcpy_from_mapping_impl(
 
 void memcpy_to_mapping(
     const void* src,
-    api::MemoryMap& dst_mapping,
+    vkapi::MemoryMap& dst_mapping,
     const size_t nbytes,
-    const api::ScalarType dtype) {
+    const vkapi::ScalarType dtype) {
 #define DTYPE_CASE(ctype, vkformat, name)                    \
-  case api::ScalarType::name:                                \
+  case vkapi::ScalarType::name:                              \
     memcpy_to_mapping_impl<ctype>(src, dst_mapping, nbytes); \
     break;
 
@@ -54,12 +54,12 @@ void memcpy_to_mapping(
 }
 
 void memcpy_from_mapping(
-    api::MemoryMap& src_mapping,
+    vkapi::MemoryMap& src_mapping,
     void* dst,
     const size_t nbytes,
-    const api::ScalarType dtype) {
+    const vkapi::ScalarType dtype) {
 #define DTYPE_CASE(ctype, vkformat, name)                      \
-  case api::ScalarType::name:                                  \
+  case vkapi::ScalarType::name:                                \
     memcpy_from_mapping_impl<ctype>(src_mapping, dst, nbytes); \
     break;
 
@@ -75,7 +75,7 @@ void copy_ptr_to_staging(
     const void* src,
     api::StorageBuffer& staging,
     const size_t nbytes) {
-  api::MemoryMap mapping(staging.buffer(), api::MemoryAccessType::WRITE);
+  vkapi::MemoryMap mapping(staging.buffer(), vkapi::MemoryAccessType::WRITE);
   mapping.invalidate();
   memcpy_to_mapping(src, mapping, nbytes, staging.dtype());
 }
@@ -84,18 +84,18 @@ void copy_staging_to_ptr(
     api::StorageBuffer& staging,
     void* dst,
     const size_t nbytes) {
-  api::MemoryMap mapping(staging.buffer(), api::MemoryAccessType::READ);
+  vkapi::MemoryMap mapping(staging.buffer(), vkapi::MemoryAccessType::READ);
   mapping.invalidate();
   memcpy_from_mapping(mapping, dst, nbytes, staging.dtype());
 }
 
 void set_staging_zeros(api::StorageBuffer& staging, const size_t nbytes) {
-  api::MemoryMap mapping(staging.buffer(), api::MemoryAccessType::WRITE);
+  vkapi::MemoryMap mapping(staging.buffer(), vkapi::MemoryAccessType::WRITE);
   uint8_t* data_ptr = mapping.template data<uint8_t>();
   memset(data_ptr, 0, staging.nbytes());
 }
 
-api::ShaderInfo get_nchw_to_tensor_shader(const api::vTensor& v_dst) {
+vkapi::ShaderInfo get_nchw_to_tensor_shader(const api::vTensor& v_dst) {
   std::string kernel_name;
   kernel_name.reserve(kShaderNameReserve);
 
@@ -106,7 +106,7 @@ api::ShaderInfo get_nchw_to_tensor_shader(const api::vTensor& v_dst) {
   return VK_KERNEL_FROM_STR(kernel_name);
 }
 
-api::ShaderInfo get_tensor_to_nchw_shader(const api::vTensor& v_src) {
+vkapi::ShaderInfo get_tensor_to_nchw_shader(const api::vTensor& v_src) {
   std::string kernel_name;
   kernel_name.reserve(kShaderNameReserve);
 
diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h
index d5996809c16..dfe86a9e263 100644
--- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h
+++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h
@@ -31,7 +31,7 @@ void set_staging_zeros(api::StorageBuffer& staging, const size_t nbytes);
 // Functions to get shaders
 //
 
-api::ShaderInfo get_nchw_to_tensor_shader(const api::vTensor& v_dst);
-api::ShaderInfo get_tensor_to_nchw_shader(const api::vTensor& v_src);
+vkapi::ShaderInfo get_nchw_to_tensor_shader(const api::vTensor& v_dst);
+vkapi::ShaderInfo get_tensor_to_nchw_shader(const api::vTensor& v_src);
 
 } // namespace vkcompute
diff --git a/backends/vulkan/serialization/schema.fbs b/backends/vulkan/serialization/schema.fbs
index 986a3280570..018e31b7598 100644
--- a/backends/vulkan/serialization/schema.fbs
+++ b/backends/vulkan/serialization/schema.fbs
@@ -32,7 +32,7 @@ enum VkStorageType : ubyte {
 
 // Describes how memory should be laid out in GPU memory. See the GPUMemoryLayout
 // enum class in PyTorch Vulkan for more details. The int values assigned to each
-// entry must match the corresponding entry in api::GPUMemoryLayout.
+// entry must match the corresponding entry in vkapi::GPUMemoryLayout.
 enum VkMemoryLayout : ubyte {
   TENSOR_WIDTH_PACKED = 0,
   TENSOR_HEIGHT_PACKED = 1,
diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py
index f50e44be72e..adff032399e 100644
--- a/backends/vulkan/test/op_tests/cases.py
+++ b/backends/vulkan/test/op_tests/cases.py
@@ -52,8 +52,8 @@ def get_binary_elementwise_inputs():
         ]
     )
     test_suite.layouts = [
-        "api::kWidthPacked",
-        "api::kChannelsPacked",
+        "vkapi::kWidthPacked",
+        "vkapi::kChannelsPacked",
     ]
     return test_suite
 
@@ -71,8 +71,8 @@ def get_mm_inputs():
     # ATen matmul doesn't support half
     test_suite.dtypes = ["at::kFloat"]
     test_suite.layouts = [
-        "api::kWidthPacked",
-        "api::kChannelsPacked",
+        "vkapi::kWidthPacked",
+        "vkapi::kChannelsPacked",
     ]
     return test_suite
 
@@ -90,8 +90,8 @@ def get_bmm_inputs():
     # ATen matmul doesn't support half
     test_suite.dtypes = ["at::kFloat"]
     test_suite.layouts = [
-        "api::kWidthPacked",
-        "api::kChannelsPacked",
+        "vkapi::kWidthPacked",
+        "vkapi::kChannelsPacked",
     ]
     return test_suite
 
@@ -112,8 +112,8 @@ def get_addmm_inputs():
     # ATen matmul doesn't support half
     test_suite.dtypes = ["at::kFloat"]
     test_suite.layouts = [
-        "api::kWidthPacked",
-        "api::kChannelsPacked",
+        "vkapi::kWidthPacked",
+        "vkapi::kChannelsPacked",
     ]
     return test_suite
 
@@ -137,8 +137,8 @@ def get_linear_inputs():
     test_suite = VkTestSuite(inputs_list)
     test_suite.dtypes = ["at::kFloat"]
     test_suite.layouts = [
-        "api::kWidthPacked",
-        "api::kChannelsPacked",
+        "vkapi::kWidthPacked",
+        "vkapi::kChannelsPacked",
     ]
     return test_suite
 
@@ -151,8 +151,8 @@ def get_weight_int8pack_mm_inputs():
 
     test_suite = VkTestSuite(inputs_list)
     test_suite.dtypes = ["at::kFloat", "at::kHalf"]
-    test_suite.layouts = ["api::kWidthPacked"]
-    test_suite.storage_types = ["api::kTexture3D", "api::kBuffer"]
+    test_suite.layouts = ["vkapi::kWidthPacked"]
+    test_suite.storage_types = ["vkapi::kTexture3D", "vkapi::kBuffer"]
     test_suite.prepacked_args = ["mat2"]
 
     test_suite.arg_dtype["mat2"] = "at::kChar"
@@ -432,7 +432,7 @@ def get_permute_inputs():
         ]
     )
 
-    test_suite.layouts = ["api::kChannelsPacked"]
+    test_suite.layouts = ["vkapi::kChannelsPacked"]
     return test_suite
 
 
@@ -457,9 +457,9 @@ def get_view_inputs():
         ]
     )
     test_suite.layouts = [
-        "api::kWidthPacked",
-        "api::kHeightPacked",
-        "api::kChannelsPacked",
+        "vkapi::kWidthPacked",
+        "vkapi::kHeightPacked",
+        "vkapi::kChannelsPacked",
     ]
     return test_suite
 
@@ -542,7 +542,7 @@ def get_slice_inputs():
     test_suite = VkTestSuite([tuple(tc) for tc in test_cases])
 
     test_suite.dtypes = ["at::kFloat", "at::kHalf"]
-    test_suite.layouts = ["api::kChannelsPacked"]
+    test_suite.layouts = ["vkapi::kChannelsPacked"]
     test_suite.data_gen = "make_seq_tensor"
     return test_suite
 
@@ -567,7 +567,7 @@ def get_index_select_inputs():
     test_suite = VkTestSuite([tuple(tc) for tc in test_cases])
 
     test_suite.dtypes = ["at::kFloat"]
-    test_suite.layouts = ["api::kChannelsPacked"]
+    test_suite.layouts = ["vkapi::kChannelsPacked"]
     return test_suite
 
 
@@ -587,7 +587,7 @@ def get_embedding_inputs():
     test_suite = VkTestSuite([tuple(tc) + (-1, "false", "false") for tc in test_cases])
 
     test_suite.dtypes = ["at::kFloat"]
-    test_suite.layouts = ["api::kChannelsPacked"]
+    test_suite.layouts = ["vkapi::kChannelsPacked"]
     return test_suite
 
 
@@ -612,7 +612,7 @@ def get_unsqueeze_inputs():
         ]
     )
     test_suite.layouts = [
-        "api::kChannelsPacked",
+        "vkapi::kChannelsPacked",
     ]
     test_suite.data_gen = "make_seq_tensor"
     return test_suite
@@ -636,7 +636,7 @@ def get_clone_inputs():
         ]
     )
     test_suite.layouts = [
-        "api::kChannelsPacked",
+        "vkapi::kChannelsPacked",
     ]
     test_suite.data_gen = "make_seq_tensor"
     return test_suite
@@ -680,7 +680,7 @@ def get_repeat_inputs():
         ]
     )
     test_suite.layouts = [
-        "api::kChannelsPacked",
+        "vkapi::kChannelsPacked",
     ]
     test_suite.data_gen = "make_seq_tensor"
     test_suite.dtypes = ["at::kFloat"]
@@ -742,7 +742,7 @@ def get_cat_inputs():
         ]
     )
     test_suite.layouts = [
-        "api::kChannelsPacked",
+        "vkapi::kChannelsPacked",
     ]
     test_suite.data_gen = "make_seq_tensor"
     test_suite.dtypes = ["at::kFloat"]
@@ -778,7 +778,7 @@ def get_split_with_sizes_inputs():
     test_suite = VkTestSuite([tuple(tc) for tc in test_cases])
 
     test_suite.layouts = [
-        "api::kChannelsPacked",
+        "vkapi::kChannelsPacked",
     ]
     test_suite.data_gen = "make_seq_tensor"
     test_suite.dtypes = ["at::kFloat"]
@@ -829,7 +829,7 @@ def get_split_tensor_inputs():
     )
 
     test_suite.layouts = [
-        "api::kChannelsPacked",
+        "vkapi::kChannelsPacked",
     ]
     test_suite.data_gen = "make_seq_tensor"
     test_suite.dtypes = ["at::kFloat"]
@@ -863,7 +863,7 @@ def get_softmax_inputs():
         ]
     )
     test_suite.layouts = [
-        "api::kChannelsPacked",
+        "vkapi::kChannelsPacked",
     ]
     return test_suite
 
@@ -888,7 +888,7 @@ def get_unary_ops_inputs():
             (S1, S2, S2, M2),
         ]
     )
-    test_suite.storage_types = ["api::kTexture3D", "api::kBuffer"]
+    test_suite.storage_types = ["vkapi::kTexture3D", "vkapi::kBuffer"]
     test_suite.atol = "1e-4"
     test_suite.rtol = "1e-4"
     return test_suite
@@ -1002,7 +1002,7 @@ def get_arange_inputs():
     )
 
     test_suite.layouts = [
-        "api::kChannelsPacked",
+        "vkapi::kChannelsPacked",
     ]
     return test_suite
 
diff --git a/backends/vulkan/test/op_tests/utils/codegen.py b/backends/vulkan/test/op_tests/utils/codegen.py
index 2f3e85d29af..a258b150f03 100644
--- a/backends/vulkan/test/op_tests/utils/codegen.py
+++ b/backends/vulkan/test/op_tests/utils/codegen.py
@@ -51,8 +51,8 @@
 class VkTestSuite(TestSuite):
     def __init__(self, input_cases: List[Any]):
         super().__init__(input_cases)
-        self.storage_types: List[str] = ["api::kTexture3D"]
-        self.layouts: List[str] = ["api::kChannelsPacked"]
+        self.storage_types: List[str] = ["vkapi::kTexture3D"]
+        self.layouts: List[str] = ["vkapi::kChannelsPacked"]
         self.data_gen: str = "make_rand_tensor"
 
 
@@ -618,7 +618,7 @@ def gen_op_check_fn(self) -> str:
 ##################################
 
 test_fixture_template = """
-class GeneratedOpsTest_{op_name} : public ::testing::TestWithParam< ::std::tuple<at::ScalarType, api::StorageType, api::GPUMemoryLayout>> {{
+class GeneratedOpsTest_{op_name} : public ::testing::TestWithParam< ::std::tuple<at::ScalarType, vkapi::StorageType, vkapi::GPUMemoryLayout>> {{
  protected:
   ComputeGraph* graph;
   at::ScalarType test_dtype = at::kFloat;
@@ -627,8 +627,8 @@ class GeneratedOpsTest_{op_name} : public ::testing::TestWithParam< ::std::tuple
 
   void SetUp() override {{
     GraphConfig config;
-    api::StorageType default_storage_type;
-    api::GPUMemoryLayout default_memory_layout;
+    vkapi::StorageType default_storage_type;
+    vkapi::GPUMemoryLayout default_memory_layout;
     std::tie(test_dtype, default_storage_type, default_memory_layout) = GetParam();
     config.set_storage_type_override(default_storage_type);
     config.set_memory_layout_override(default_memory_layout);
@@ -705,18 +705,18 @@ def gen_parameterization(self) -> str:
 using namespace vkcompute;
 using TensorOptions = at::TensorOptions;
 
-api::ScalarType from_at_scalartype(c10::ScalarType at_scalartype) {
+vkapi::ScalarType from_at_scalartype(c10::ScalarType at_scalartype) {
   switch (at_scalartype) {
     case c10::kFloat:
-      return api::kFloat;
+      return vkapi::kFloat;
     case c10::kHalf:
-      return api::kHalf;
+      return vkapi::kHalf;
     case c10::kInt:
-      return api::kInt;
+      return vkapi::kInt;
     case c10::kLong:
-      return api::kInt;
+      return vkapi::kInt;
     case c10::kChar:
-      return api::kChar;
+      return vkapi::kChar;
     default:
       VK_THROW("Unsupported at::ScalarType!");
   }
diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp
index 1fdbb82c0d1..95d40e317ca 100644
--- a/backends/vulkan/test/utils/test_utils.cpp
+++ b/backends/vulkan/test/utils/test_utils.cpp
@@ -20,10 +20,11 @@
 
 void record_nchw_to_buffer_op(
     api::Context* const context,
-    api::VulkanBuffer& src_buffer,
+    vkapi::VulkanBuffer& src_buffer,
     api::vTensor& v_dst) {
-  api::PipelineBarrier pipeline_barrier{};
-  api::SpecVarList specialization_constants = {SV(v_dst.packed_dim_whcn_idx())};
+  vkapi::PipelineBarrier pipeline_barrier{};
+  vkapi::SpecVarList specialization_constants = {
+      SV(v_dst.packed_dim_whcn_idx())};
 
   context->submit_compute_job(
       get_nchw_to_tensor_shader(v_dst),
@@ -35,8 +36,8 @@ void record_nchw_to_buffer_op(
       0,
       v_dst.buffer(
           pipeline_barrier,
-          api::PipelineStage::COMPUTE,
-          api::MemoryAccessType::WRITE),
+          vkapi::PipelineStage::COMPUTE,
+          vkapi::MemoryAccessType::WRITE),
       src_buffer,
       v_dst.sizes_ubo(),
       v_dst.texel_strides_ubo(),
@@ -46,9 +47,10 @@ void record_nchw_to_buffer_op(
 void record_buffer_to_nchw_op(
     api::Context* const context,
     api::vTensor& v_src,
-    api::VulkanBuffer& dst_buffer) {
-  api::PipelineBarrier pipeline_barrier{};
-  api::SpecVarList specialization_constants = {SV(v_src.packed_dim_whcn_idx())};
+    vkapi::VulkanBuffer& dst_buffer) {
+  vkapi::PipelineBarrier pipeline_barrier{};
+  vkapi::SpecVarList specialization_constants = {
+      SV(v_src.packed_dim_whcn_idx())};
 
   context->submit_compute_job(
       get_tensor_to_nchw_shader(v_src),
@@ -58,7 +60,7 @@ void record_buffer_to_nchw_op(
       specialization_constants,
       VK_NULL_HANDLE,
       0,
-      v_src.buffer(pipeline_barrier, api::PipelineStage::COMPUTE),
+      v_src.buffer(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
       dst_buffer,
       v_src.sizes_ubo(),
       v_src.texel_strides_ubo(),
@@ -67,10 +69,11 @@ void record_buffer_to_nchw_op(
 
 void record_nchw_to_image_op(
     api::Context* const context,
-    api::VulkanBuffer& src_buffer,
+    vkapi::VulkanBuffer& src_buffer,
     api::vTensor& v_dst) {
-  api::PipelineBarrier pipeline_barrier{};
-  api::SpecVarList specialization_constants = {SV(v_dst.packed_dim_whcn_idx())};
+  vkapi::PipelineBarrier pipeline_barrier{};
+  vkapi::SpecVarList specialization_constants = {
+      SV(v_dst.packed_dim_whcn_idx())};
 
   context->submit_compute_job(
       get_nchw_to_tensor_shader(v_dst),
@@ -82,8 +85,8 @@ void record_nchw_to_image_op(
       0,
       v_dst.image(
           pipeline_barrier,
-          api::PipelineStage::COMPUTE,
-          api::MemoryAccessType::WRITE),
+          vkapi::PipelineStage::COMPUTE,
+          vkapi::MemoryAccessType::WRITE),
       src_buffer,
       v_dst.sizes_ubo());
 }
@@ -91,9 +94,10 @@ void record_nchw_to_image_op(
 void record_image_to_nchw_op(
     api::Context* const context,
     api::vTensor& v_src,
-    api::VulkanBuffer& dst_buffer) {
-  api::PipelineBarrier pipeline_barrier{};
-  api::SpecVarList specialization_constants = {SV(v_src.packed_dim_whcn_idx())};
+    vkapi::VulkanBuffer& dst_buffer) {
+  vkapi::PipelineBarrier pipeline_barrier{};
+  vkapi::SpecVarList specialization_constants = {
+      SV(v_src.packed_dim_whcn_idx())};
 
   context->submit_compute_job(
       get_tensor_to_nchw_shader(v_src),
@@ -103,18 +107,18 @@ void record_image_to_nchw_op(
       specialization_constants,
       VK_NULL_HANDLE,
       0,
-      v_src.image(pipeline_barrier, api::PipelineStage::COMPUTE),
+      v_src.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
       dst_buffer,
       v_src.sizes_ubo());
 }
 
 void record_conv2d_prepack_weights_op(
     api::Context* const context,
-    api::VulkanBuffer& src_buffer,
+    vkapi::VulkanBuffer& src_buffer,
     api::vTensor& v_dst,
     const std::vector<int64_t>& original_sizes,
     const bool transposed) {
-  api::PipelineBarrier pipeline_barrier{};
+  vkapi::PipelineBarrier pipeline_barrier{};
 
   std::string kernel_name;
   if (transposed) {
@@ -124,12 +128,12 @@ void record_conv2d_prepack_weights_op(
   }
   kernel_name += "_prepack_weights";
   add_dtype_suffix(kernel_name, v_dst);
-  api::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name);
+  vkapi::ShaderInfo shader = VK_KERNEL_FROM_STR(kernel_name);
 
   api::ParamsBuffer original_sizes_ubo(
       context, utils::make_ivec4(original_sizes, /*reverse = */ true));
 
-  api::SpecVarList specialization_constants = {};
+  vkapi::SpecVarList specialization_constants = {};
   context->submit_compute_job(
       shader,
       pipeline_barrier,
@@ -140,8 +144,8 @@ void record_conv2d_prepack_weights_op(
       0,
       v_dst.image(
           pipeline_barrier,
-          api::PipelineStage::COMPUTE,
-          api::MemoryAccessType::WRITE),
+          vkapi::PipelineStage::COMPUTE,
+          vkapi::MemoryAccessType::WRITE),
       src_buffer,
       v_dst.sizes_ubo(),
       original_sizes_ubo.buffer());
@@ -156,8 +160,8 @@ void record_binary_op(
   std::string kernel_name = "binary_" + op_name + "_nobroadcast__test";
   add_dtype_suffix(kernel_name, v_dst);
 
-  api::PipelineBarrier pipeline_barrier{};
-  api::SpecVarList specialization_constants = {};
+  vkapi::PipelineBarrier pipeline_barrier{};
+  vkapi::SpecVarList specialization_constants = {};
   context->submit_compute_job(
       VK_KERNEL_FROM_STR(kernel_name),
       pipeline_barrier,
@@ -168,10 +172,10 @@ void record_binary_op(
       0,
       v_dst.image(
           pipeline_barrier,
-          api::PipelineStage::COMPUTE,
-          api::MemoryAccessType::WRITE),
-      v_in1.image(pipeline_barrier, api::PipelineStage::COMPUTE),
-      v_in2.image(pipeline_barrier, api::PipelineStage::COMPUTE),
+          vkapi::PipelineStage::COMPUTE,
+          vkapi::MemoryAccessType::WRITE),
+      v_in1.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
+      v_in2.image(pipeline_barrier, vkapi::PipelineStage::COMPUTE),
       v_dst.sizes_ubo());
 }
 
@@ -200,16 +204,16 @@ void execute_and_check_add(
 void record_index_fill_buffer(api::Context* context, api::vTensor& v_ten) {
   std::string kernel_name("idx_fill_buffer");
   switch (v_ten.dtype()) {
-    case api::kFloat:
+    case vkapi::kFloat:
       kernel_name += "_float";
       break;
-    case api::kHalf:
+    case vkapi::kHalf:
       kernel_name += "_half";
       break;
-    case api::kQInt8:
+    case vkapi::kQInt8:
       kernel_name += "_int8";
       break;
-    case api::kQUInt8:
+    case vkapi::kQUInt8:
       kernel_name += "_uint8";
       break;
     default:
@@ -220,8 +224,8 @@ void record_index_fill_buffer(api::Context* context, api::vTensor& v_ten) {
   api::ParamsBuffer params(api::context(), int32_t(v_ten.numel()));
 
   {
-    api::PipelineBarrier pipeline_barrier{};
-    api::SpecVarList specialization_constants = {};
+    vkapi::PipelineBarrier pipeline_barrier{};
+    vkapi::SpecVarList specialization_constants = {};
     api::context()->submit_compute_job(
         VK_KERNEL_FROM_STR(kernel_name),
         pipeline_barrier,
@@ -232,8 +236,8 @@ void record_index_fill_buffer(api::Context* context, api::vTensor& v_ten) {
         0,
         v_ten.buffer(
             pipeline_barrier,
-            api::PipelineStage::COMPUTE,
-            api::MemoryAccessType::READ),
+            vkapi::PipelineStage::COMPUTE,
+            vkapi::MemoryAccessType::READ),
         params.buffer());
   }
 }
@@ -242,8 +246,8 @@ void record_scalar_add_buffer(
     api::Context* context,
     api::vTensor& v_ten,
     float offset) {
-  api::PipelineBarrier pipeline_barrier{};
-  api::SpecVarList specialization_constants = {SV(offset)};
+  vkapi::PipelineBarrier pipeline_barrier{};
+  vkapi::SpecVarList specialization_constants = {SV(offset)};
   std::string kernel = "scalar_add_buffer";
   add_dtype_suffix(kernel, v_ten);
   api::context()->submit_compute_job(
@@ -256,8 +260,8 @@ void record_scalar_add_buffer(
       0,
       v_ten.buffer(
           pipeline_barrier,
-          api::PipelineStage::COMPUTE,
-          api::MemoryAccessType::READ | api::MemoryAccessType::WRITE),
+          vkapi::PipelineStage::COMPUTE,
+          vkapi::MemoryAccessType::READ | vkapi::MemoryAccessType::WRITE),
       v_ten.ntexels_ubo());
 }
 
@@ -277,7 +281,7 @@ void fill_vtensor(api::vTensor& vten, std::vector<float>& data) {
   api::StorageBuffer staging_buffer(api::context(), vten.dtype(), data.size());
 
 #define CASE(ctype, name)                                          \
-  case api::ScalarType::name: {                                    \
+  case vkapi::ScalarType::name: {                                  \
     std::vector<ctype> data_converted;                             \
     data_converted.resize(data.size());                            \
     for (int i = 0; i < data.size(); ++i) {                        \
@@ -295,7 +299,7 @@ void fill_vtensor(api::vTensor& vten, std::vector<float>& data) {
 
 #undef CASE
 
-  if (vten.storage_type() == api::StorageType::BUFFER) {
+  if (vten.storage_type() == vkapi::StorageType::BUFFER) {
     record_nchw_to_buffer_op(api::context(), staging_buffer.buffer(), vten);
   } else {
     record_nchw_to_image_op(api::context(), staging_buffer.buffer(), vten);
@@ -332,18 +336,18 @@ void extract_vtensor(api::vTensor& vten, std::vector<float>& data) {
   api::StorageBuffer staging_buffer(
       api::context(), vten.dtype(), vten.gpu_numel());
 
-  if (vten.storage_type() == api::StorageType::BUFFER) {
+  if (vten.storage_type() == vkapi::StorageType::BUFFER) {
     record_buffer_to_nchw_op(api::context(), vten, staging_buffer.buffer());
   } else {
     record_image_to_nchw_op(api::context(), vten, staging_buffer.buffer());
   }
 
-  api::VulkanFence fence = api::context()->fences().get_fence();
+  vkapi::VulkanFence fence = api::context()->fences().get_fence();
   api::context()->submit_cmd_to_gpu(fence.get_submit_handle());
   fence.wait();
 
 #define CASE(ctype, name)                                          \
-  case api::ScalarType::name: {                                    \
+  case vkapi::ScalarType::name: {                                  \
     std::vector<ctype> data_converted(data.size());                \
     copy_staging_to_ptr(                                           \
         staging_buffer, data_converted.data(), vten.gpu_nbytes()); \
@@ -366,12 +370,12 @@ void extract_vtensor(api::vTensor& vten, std::vector<float>& data) {
 //
 
 void submit_to_gpu() {
-  api::VulkanFence fence = api::context()->fences().get_fence();
+  vkapi::VulkanFence fence = api::context()->fences().get_fence();
   api::context()->submit_cmd_to_gpu(fence.get_submit_handle());
   fence.wait();
 }
 
-api::Allocation allocate_memory_for(const api::vTensor& vten) {
+vkapi::Allocation allocate_memory_for(const api::vTensor& vten) {
   return api::context()->adapter_ptr()->vma().create_allocation(
       vten.get_memory_requirements(), vten.get_allocation_create_info());
 }
diff --git a/backends/vulkan/test/utils/test_utils.h b/backends/vulkan/test/utils/test_utils.h
index 2d111a83f1a..691904232b1 100644
--- a/backends/vulkan/test/utils/test_utils.h
+++ b/backends/vulkan/test/utils/test_utils.h
@@ -18,33 +18,33 @@
 
 using namespace vkcompute;
 
-#define CREATE_FLOAT_TEXTURE(sizes, allocate_memory) \
-  api::vTensor(                                      \
-      api::context(),                                \
-      sizes,                                         \
-      api::kFloat,                                   \
-      api::StorageType::TEXTURE_3D,                  \
-      api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED,  \
+#define CREATE_FLOAT_TEXTURE(sizes, allocate_memory)  \
+  api::vTensor(                                       \
+      api::context(),                                 \
+      sizes,                                          \
+      vkapi::kFloat,                                  \
+      vkapi::StorageType::TEXTURE_3D,                 \
+      vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, \
       allocate_memory);
 
 #define CREATE_FLOAT_BUFFER(sizes, allocate_memory) \
   api::vTensor(                                     \
       api::context(),                               \
       sizes,                                        \
-      api::kFloat,                                  \
-      api::StorageType::BUFFER,                     \
-      api::GPUMemoryLayout::TENSOR_WIDTH_PACKED,    \
+      vkapi::kFloat,                                \
+      vkapi::StorageType::BUFFER,                   \
+      vkapi::GPUMemoryLayout::TENSOR_WIDTH_PACKED,  \
       allocate_memory);
 
 #define DEFINE_STAGING_BUFFER_AND_RECORD_TO_GPU_FOR(tensor) \
   api::StorageBuffer staging_buffer_##tensor(               \
-      api::context(), api::kFloat, tensor.gpu_numel());     \
+      api::context(), vkapi::kFloat, tensor.gpu_numel());   \
   record_nchw_to_image_op(                                  \
       api::context(), staging_buffer_##tensor.buffer(), tensor);
 
 #define DEFINE_STAGING_BUFFER_AND_RECORD_FROM_GPU_FOR(tensor) \
   api::StorageBuffer staging_buffer_##tensor(                 \
-      api::context(), api::kFloat, tensor.gpu_numel());       \
+      api::context(), vkapi::kFloat, tensor.gpu_numel());     \
   record_image_to_nchw_op(                                    \
       api::context(), tensor, staging_buffer_##tensor.buffer());
 
@@ -64,27 +64,27 @@ using namespace vkcompute;
 
 void record_nchw_to_buffer_op(
     api::Context* const context,
-    api::VulkanBuffer& src_buffer,
+    vkapi::VulkanBuffer& src_buffer,
     api::vTensor& v_dst);
 
 void record_buffer_to_nchw_op(
     api::Context* const context,
     api::vTensor& v_src,
-    api::VulkanBuffer& dst_buffer);
+    vkapi::VulkanBuffer& dst_buffer);
 
 void record_nchw_to_image_op(
     api::Context* const context,
-    api::VulkanBuffer& src_buffer,
+    vkapi::VulkanBuffer& src_buffer,
     api::vTensor& v_dst);
 
 void record_image_to_nchw_op(
     api::Context* const context,
     api::vTensor& v_src,
-    api::VulkanBuffer& dst_buffer);
+    vkapi::VulkanBuffer& dst_buffer);
 
 void record_conv2d_prepack_weights_op(
     api::Context* const context,
-    api::VulkanBuffer& src_buffer,
+    vkapi::VulkanBuffer& src_buffer,
     api::vTensor& v_dst,
     const std::vector<int64_t>& original_sizes,
     const bool transposed);
@@ -181,7 +181,7 @@ inline int64_t get_buf_idx(
 
 void submit_to_gpu();
 
-api::Allocation allocate_memory_for(const api::vTensor& vten);
+vkapi::Allocation allocate_memory_for(const api::vTensor& vten);
 
 VmaTotalStatistics get_vma_stats();
 
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
index f45dacf3137..688be92b760 100644
--- a/backends/vulkan/test/vulkan_compute_api_test.cpp
+++ b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -55,7 +55,7 @@ class VulkanComputeAPITest : public ::testing::Test {
   }
 
   void TearDown() override {
-    api::context()->flush();
+    context()->flush();
 
     // Make sure we are ending with a clean slate
     EXPECT_TRUE(get_vma_allocation_count() == 0);
@@ -63,19 +63,19 @@ class VulkanComputeAPITest : public ::testing::Test {
 };
 
 TEST_F(VulkanComputeAPITest, print_adapter) {
-  std::cout << *(api::context()->adapter_ptr()) << std::endl;
+  std::cout << *(context()->adapter_ptr()) << std::endl;
 }
 
 std::vector<int64_t> get_reference_strides(
     const std::vector<int64_t>& sizes,
-    const api::GPUMemoryLayout layout,
+    const vkapi::GPUMemoryLayout layout,
     const bool texel_strides) {
   int64_t C = utils::val_at(-3, sizes);
   int64_t H = utils::val_at(-2, sizes);
   int64_t W = utils::val_at(-1, sizes);
 
   switch (layout) {
-    case api::kWidthPacked:
+    case vkapi::kWidthPacked:
       if (texel_strides) {
         W = utils::div_up(W, INT64_C(4));
       }
@@ -92,7 +92,7 @@ std::vector<int64_t> get_reference_strides(
           return {};
       }
       break;
-    case api::kHeightPacked:
+    case vkapi::kHeightPacked:
       if (texel_strides) {
         H = utils::div_up(H, INT64_C(4));
       }
@@ -108,7 +108,7 @@ std::vector<int64_t> get_reference_strides(
         default:
           return {};
       }
-    case api::kChannelsPacked:
+    case vkapi::kChannelsPacked:
       if (texel_strides) {
         C = utils::div_up(C, INT64_C(4));
       }
@@ -134,7 +134,7 @@ TEST_F(VulkanComputeAPITest, calculate_tensor_strides_test) {
       continue;
     }
     for (const auto& layout :
-         {api::kWidthPacked, api::kHeightPacked, api::kChannelsPacked}) {
+         {vkapi::kWidthPacked, vkapi::kHeightPacked, vkapi::kChannelsPacked}) {
       // texel_strides = true
       {
         std::vector<int64_t> strides = calculate_strides(sizes, layout);
@@ -157,7 +157,7 @@ TEST_F(VulkanComputeAPITest, calculate_tensor_strides_test) {
 
 TEST_F(VulkanComputeAPITest, retrieve_custom_shader_test) {
   // Try to get shader from custom shader library
-  const api::ShaderInfo& kernel = VK_KERNEL(test_shader);
+  const vkapi::ShaderInfo& kernel = VK_KERNEL(test_shader);
 
   ASSERT_TRUE(kernel.kernel_name == "test_shader");
 }
@@ -168,18 +168,18 @@ TEST_F(VulkanComputeAPITest, spec_var_classes_test) {
   ASSERT_FALSE(SV(15.0f) == SV(15));
   ASSERT_FALSE(SV(1u) == SV(true));
 
-  size_t sv_size = sizeof(api::SpecVar);
+  size_t sv_size = sizeof(vkapi::SpecVar);
 
-  api::SpecVarList spec_vars = {};
+  vkapi::SpecVarList spec_vars = {};
   ASSERT_TRUE(spec_vars.size() == 0);
   spec_vars = {SV(1.1f), SV(32), SV(45)};
   ASSERT_TRUE(spec_vars.size() == 3);
-  api::SpecVarList spec_vars_other = {SV(2.6f), SV(true), SV(78u), SV(5.5f)};
+  vkapi::SpecVarList spec_vars_other = {SV(2.6f), SV(true), SV(78u), SV(5.5f)};
   spec_vars.append(spec_vars_other);
   ASSERT_TRUE(spec_vars.size() == 7);
 
   // Check validity of the data
-  const api::SpecVar* data = spec_vars.data();
+  const vkapi::SpecVar* data = spec_vars.data();
   ASSERT_TRUE(*(reinterpret_cast<const float*>(data + 3)) == 2.6f);
   ASSERT_TRUE(*(reinterpret_cast<const int32_t*>(data + 1)) == 32);
   ASSERT_TRUE(*(reinterpret_cast<const int32_t*>(data + 5)) == 78u);
@@ -199,11 +199,11 @@ TEST_F(VulkanComputeAPITest, spec_var_classes_test) {
   }
 
   // Check copy
-  api::SpecVarList spec_vars_copy(spec_vars);
+  vkapi::SpecVarList spec_vars_copy(spec_vars);
   ASSERT_TRUE(spec_vars_copy.size() == 7);
 
   // Check validity of the copied data
-  const api::SpecVar* copy_data = spec_vars_copy.data();
+  const vkapi::SpecVar* copy_data = spec_vars_copy.data();
   ASSERT_TRUE(*(reinterpret_cast<const bool*>(copy_data + 4)) == true);
   ASSERT_TRUE(*(reinterpret_cast<const int32_t*>(copy_data + 2)) == 45);
   ASSERT_TRUE(*(reinterpret_cast<const float*>(copy_data + 6)) == 5.5f);
@@ -211,16 +211,16 @@ TEST_F(VulkanComputeAPITest, spec_var_classes_test) {
 
 TEST_F(VulkanComputeAPITest, spec_var_shader_test) {
   size_t len = 16;
-  api::StorageBuffer buffer(api::context(), api::kFloat, len);
+  StorageBuffer buffer(context(), vkapi::kFloat, len);
 
   float scale = 3.0f;
   float offset = 1.5f;
 
   {
-    api::ParamsBuffer params(api::context(), int32_t(len));
+    ParamsBuffer params(context(), int32_t(len));
     uint32_t len_div4 = utils::div_up(uint32_t(len), uint32_t(4));
-    api::PipelineBarrier pipeline_barrier{};
-    api::context()->submit_compute_job(
+    vkapi::PipelineBarrier pipeline_barrier{};
+    context()->submit_compute_job(
         VK_KERNEL(fill_buffer),
         pipeline_barrier,
         {64, 1, 1},
@@ -243,9 +243,9 @@ TEST_F(VulkanComputeAPITest, spec_var_shader_test) {
 }
 
 TEST_F(VulkanComputeAPITest, update_params_between_submit) {
-  api::context()->set_cmd(/*reusable = */ true);
+  context()->set_cmd(/*reusable = */ true);
   std::vector<int64_t> sizes = {4, 4, 2};
-  api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
 
   std::string kernel_name("fill_texture__test");
   add_dtype_suffix(kernel_name, a);
@@ -262,12 +262,12 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) {
       {5.0, 5.0, 5.0, 5.0},
   };
 
-  api::ParamsBuffer params(api::context(), block);
+  ParamsBuffer params(context(), block);
 
   {
-    api::PipelineBarrier pipeline_barrier{};
-    api::SpecVarList specialization_constants = {};
-    api::context()->submit_compute_job(
+    vkapi::PipelineBarrier pipeline_barrier{};
+    vkapi::SpecVarList specialization_constants = {};
+    context()->submit_compute_job(
         VK_KERNEL_FROM_STR(kernel_name),
         pipeline_barrier,
         {4, 4, 4},
@@ -277,13 +277,13 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) {
         0,
         a.image(
             pipeline_barrier,
-            api::PipelineStage::COMPUTE,
-            api::MemoryAccessType::WRITE),
+            vkapi::PipelineStage::COMPUTE,
+            vkapi::MemoryAccessType::WRITE),
         params.buffer());
   }
 
-  api::StorageBuffer staging_buffer(api::context(), api::kFloat, a.gpu_numel());
-  record_image_to_nchw_op(api::context(), a, staging_buffer.buffer());
+  StorageBuffer staging_buffer(context(), vkapi::kFloat, a.gpu_numel());
+  record_image_to_nchw_op(context(), a, staging_buffer.buffer());
 
   submit_to_gpu();
   check_staging_buffer(staging_buffer, 5.0f);
@@ -300,22 +300,22 @@ TEST_F(VulkanComputeAPITest, update_params_between_submit) {
   check_staging_buffer(staging_buffer, 4.0f);
 }
 
-template <typename T, api::ScalarType dtype>
+template <typename T, vkapi::ScalarType dtype>
 void test_storage_buffer_type(const size_t len) {
-  api::StorageBuffer buffer(api::context(), dtype, len);
+  StorageBuffer buffer(context(), dtype, len);
 
   std::string kernel_name("idx_fill_buffer");
   switch (dtype) {
-    case api::kFloat:
+    case vkapi::kFloat:
       kernel_name += "_float";
       break;
-    case api::kHalf:
+    case vkapi::kHalf:
       kernel_name += "_half";
       break;
-    case api::kQInt8:
+    case vkapi::kQInt8:
       kernel_name += "_int8";
       break;
-    case api::kQUInt8:
+    case vkapi::kQUInt8:
       kernel_name += "_uint8";
       break;
     default:
@@ -323,13 +323,13 @@ void test_storage_buffer_type(const size_t len) {
       break;
   }
 
-  api::ParamsBuffer params(api::context(), int32_t(len));
+  ParamsBuffer params(context(), int32_t(len));
 
   {
     uint32_t len_div4 = utils::div_up(uint32_t(len), uint32_t(4));
-    api::PipelineBarrier pipeline_barrier{};
-    api::SpecVarList specialization_constants = {};
-    api::context()->submit_compute_job(
+    vkapi::PipelineBarrier pipeline_barrier{};
+    vkapi::SpecVarList specialization_constants = {};
+    context()->submit_compute_job(
         VK_KERNEL_FROM_STR(kernel_name),
         pipeline_barrier,
         {64, 1, 1},
@@ -352,37 +352,37 @@ void test_storage_buffer_type(const size_t len) {
 }
 
 TEST_F(VulkanComputeAPITest, test_buffer_float) {
-  test_storage_buffer_type<float, api::kFloat>(16);
+  test_storage_buffer_type<float, vkapi::kFloat>(16);
 }
 
 TEST_F(VulkanComputeAPITest, test_buffer_float16) {
-  if (!api::context()->adapter_ptr()->has_full_float16_buffers_support()) {
+  if (!context()->adapter_ptr()->has_full_float16_buffers_support()) {
     GTEST_SKIP();
   }
-  test_storage_buffer_type<torch::executor::Half, api::kHalf>(16);
+  test_storage_buffer_type<torch::executor::Half, vkapi::kHalf>(16);
 }
 
 TEST_F(VulkanComputeAPITest, test_buffer_int8) {
-  if (!api::context()->adapter_ptr()->has_full_int8_buffers_support()) {
+  if (!context()->adapter_ptr()->has_full_int8_buffers_support()) {
     GTEST_SKIP();
   }
-  test_storage_buffer_type<int8_t, api::kQInt8>(16);
+  test_storage_buffer_type<int8_t, vkapi::kQInt8>(16);
 }
 
 TEST_F(VulkanComputeAPITest, test_zero_size_tensor) {
   // Simple test that performs a + b -> c
 
   std::vector<int64_t> sizes = {0, 5, 7};
-  api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
-  api::vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
-  api::vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
 
   // Fill input tensors
   fill_vtensor(a, 2.5f);
   fill_vtensor(b, 1.5f);
 
   // a + b -> c
-  record_binary_op(api::context(), "add", a, b, c);
+  record_binary_op(context(), "add", a, b, c);
 
   // Extract output tensor
   std::vector<float> data_out = extract_vtensor(c);
@@ -402,10 +402,10 @@ TEST_F(VulkanComputeAPITest, test_zero_size_tensor) {
 }
 
 template <typename T>
-void run_buffer_tensor_sanity_check(api::vTensor& tensor) {
+void run_buffer_tensor_sanity_check(vTensor& tensor) {
   fill_vtensor(tensor, 0.0f, true);
 
-  record_scalar_add_buffer(api::context(), tensor, 2.0f);
+  record_scalar_add_buffer(context(), tensor, 2.0f);
   std::vector<float> data_out = extract_vtensor(tensor);
 
   // Check output
@@ -416,33 +416,34 @@ void run_buffer_tensor_sanity_check(api::vTensor& tensor) {
 
 TEST_F(VulkanComputeAPITest, buffer_tensor_sanity_check) {
   for (const auto& sizes : standard_sizes_to_test) {
-    for (const auto& dtype : {api::kFloat, api::kHalf, api::kChar}) {
-      if (dtype == api::kHalf &&
-          !api::context()->adapter_ptr()->has_full_float16_buffers_support()) {
+    for (const auto& dtype : {vkapi::kFloat, vkapi::kHalf, vkapi::kChar}) {
+      if (dtype == vkapi::kHalf &&
+          !context()->adapter_ptr()->has_full_float16_buffers_support()) {
         continue;
       }
-      if (dtype == api::kHalf && utils::multiply_integers(sizes) >= 2048) {
+      if (dtype == vkapi::kHalf && utils::multiply_integers(sizes) >= 2048) {
         continue;
       }
-      if (dtype == api::kChar &&
-          !api::context()->adapter_ptr()->has_full_int8_buffers_support()) {
+      if (dtype == vkapi::kChar &&
+          !context()->adapter_ptr()->has_full_int8_buffers_support()) {
         continue;
       }
-      if (dtype == api::kChar && utils::multiply_integers(sizes) >= 128) {
+      if (dtype == vkapi::kChar && utils::multiply_integers(sizes) >= 128) {
         continue;
       }
       for (const auto& layout :
-           {api::kWidthPacked, api::kHeightPacked, api::kChannelsPacked}) {
-        api::vTensor a =
-            api::vTensor(api::context(), sizes, dtype, api::kBuffer, layout);
+           {vkapi::kWidthPacked,
+            vkapi::kHeightPacked,
+            vkapi::kChannelsPacked}) {
+        vTensor a = vTensor(context(), sizes, dtype, vkapi::kBuffer, layout);
         switch (dtype) {
-          case api::kFloat:
+          case vkapi::kFloat:
             run_buffer_tensor_sanity_check<float>(a);
             break;
-          case api::kHalf:
+          case vkapi::kHalf:
             run_buffer_tensor_sanity_check<torch::executor::Half>(a);
             break;
-          case api::kChar:
+          case vkapi::kChar:
             run_buffer_tensor_sanity_check<int8_t>(a);
             break;
           default:
@@ -457,16 +458,16 @@ TEST_F(VulkanComputeAPITest, texture_add_sanity_check) {
   // Simple test that performs a + b -> c
 
   std::vector<int64_t> sizes = {4, 4, 1};
-  api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
-  api::vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
-  api::vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
 
   // Fill input tensors
   fill_vtensor(a, 2.5f);
   fill_vtensor(b, 1.5f);
 
   // a + b -> c
-  record_binary_op(api::context(), "add", a, b, c);
+  record_binary_op(context(), "add", a, b, c);
 
   // Extract output tensor
   std::vector<float> data_out = extract_vtensor(c);
@@ -482,9 +483,9 @@ TEST_F(VulkanComputeAPITest, texture_deferred_allocation_test) {
   // memory is allocated in a deferred fashion
 
   std::vector<int64_t> sizes = {4, 4, 1};
-  api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
-  api::vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
-  api::vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
 
   // No allocations made so far
   EXPECT_TRUE(get_vma_allocation_count() == 0);
@@ -495,11 +496,11 @@ TEST_F(VulkanComputeAPITest, texture_deferred_allocation_test) {
   std::fill(data_b.begin(), data_b.end(), 1.5f);
 
   // Allocate memory at the last possible opportunity
-  api::Allocation a_mem = allocate_memory_for(a);
+  vkapi::Allocation a_mem = allocate_memory_for(a);
   a.image().bind_allocation(a_mem);
-  api::Allocation b_mem = allocate_memory_for(b);
+  vkapi::Allocation b_mem = allocate_memory_for(b);
   b.image().bind_allocation(b_mem);
-  api::Allocation c_mem = allocate_memory_for(c);
+  vkapi::Allocation c_mem = allocate_memory_for(c);
   c.image().bind_allocation(c_mem);
 
   // One allocation for each tensor
@@ -508,7 +509,7 @@ TEST_F(VulkanComputeAPITest, texture_deferred_allocation_test) {
   fill_vtensor(a, data_a);
   fill_vtensor(b, data_b);
 
-  record_binary_op(api::context(), "add", a, b, c);
+  record_binary_op(context(), "add", a, b, c);
 
   std::vector<float> data_c(c.gpu_numel());
   extract_vtensor(c, data_c);
@@ -525,25 +526,25 @@ TEST_F(VulkanComputeAPITest, texture_resource_aliasing_test) {
   // and share memory between tensors whenever possible.
 
   std::vector<int64_t> sizes = {4, 4, 1};
-  api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
-  api::vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
-  api::vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
-  api::vTensor d = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
-  api::vTensor e = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor d = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor e = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
 
   // No allocations made so far
   EXPECT_TRUE(get_vma_allocation_count() == 0);
 
   // a and d can share the same memory allocation
-  api::Allocation a_d_mem = allocate_memory_for(a);
+  vkapi::Allocation a_d_mem = allocate_memory_for(a);
   a.image().bind_allocation(a_d_mem);
   d.image().bind_allocation(a_d_mem);
   // b and e can share the same memory allocation
-  api::Allocation b_e_mem = allocate_memory_for(b);
+  vkapi::Allocation b_e_mem = allocate_memory_for(b);
   b.image().bind_allocation(b_e_mem);
   e.image().bind_allocation(b_e_mem);
   // c must have its own memory allocation
-  api::Allocation c_mem = allocate_memory_for(c);
+  vkapi::Allocation c_mem = allocate_memory_for(c);
   c.image().bind_allocation(c_mem);
 
   // 3 allocations should be made
@@ -562,13 +563,13 @@ TEST_F(VulkanComputeAPITest, texture_resource_aliasing_test) {
   fill_vtensor(b, data_b);
 
   // a + b -> c
-  record_binary_op(api::context(), "add", a, b, c);
+  record_binary_op(context(), "add", a, b, c);
 
   // Now d can be filled with data
   fill_vtensor(d, data_d);
 
   // c + d -> e
-  record_binary_op(api::context(), "add", c, d, e);
+  record_binary_op(context(), "add", c, d, e);
 
   // Extract data from e
   std::vector<float> data_e(e.gpu_numel());
@@ -585,25 +586,25 @@ TEST_F(VulkanComputeAPITest, resource_bind_twice_fails) {
   // fails
 
   std::vector<int64_t> sizes = {4, 4, 1};
-  api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
 
   // Try to double bind a resource, which should fail
-  api::Allocation a_mem = allocate_memory_for(a);
-  EXPECT_THROW(a.image().bind_allocation(a_mem), api::Error);
+  vkapi::Allocation a_mem = allocate_memory_for(a);
+  EXPECT_THROW(a.image().bind_allocation(a_mem), vkapi::Error);
 }
 
 TEST_F(VulkanComputeAPITest, resource_destructor_non_owning_memory) {
-  // Check that the destructor of a api::vTensor that does not own its memory
+  // Check that the destructor of a vTensor that does not own its memory
   // does not free the memory
 
-  api::Allocation memory;
+  vkapi::Allocation memory;
 
   // Default Allocation constructor should not allocate memory
   EXPECT_TRUE(get_vma_allocation_count() == 0);
 
   std::vector<int64_t> sizes = {4, 4, 1};
   {
-    api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+    vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
 
     memory = allocate_memory_for(a);
     EXPECT_TRUE(get_vma_allocation_count() == 1);
@@ -615,11 +616,11 @@ TEST_F(VulkanComputeAPITest, resource_destructor_non_owning_memory) {
 }
 
 TEST_F(VulkanComputeAPITest, use_non_bound_textures_fails) {
-  // Try to encode a command buffer with a api::vTensor that does not have
+  // Try to encode a command buffer with a vTensor that does not have
   // memory
 
   std::vector<int64_t> sizes = {4, 4, 1};
-  api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
 
   // No allocations yet
   EXPECT_TRUE(get_vma_allocation_count() == 0);
@@ -627,15 +628,15 @@ TEST_F(VulkanComputeAPITest, use_non_bound_textures_fails) {
   std::vector<float> data_a(a.gpu_numel());
   std::fill(data_a.begin(), data_a.end(), 2.5f);
 
-  // Encoding a command buffer with a api::vTensor without memory should throw
-  EXPECT_THROW(fill_vtensor(a, data_a), api::Error);
+  // Encoding a command buffer with a vTensor without memory should throw
+  EXPECT_THROW(fill_vtensor(a, data_a), vkapi::Error);
 }
 
 TEST_F(VulkanComputeAPITest, tensor_reallocation_test) {
   std::vector<int64_t> sizes = {4, 4, 1};
-  api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
-  api::vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
-  api::vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
 
   execute_and_check_add(a, b, c, 3.0f, 5.0f);
 
@@ -646,7 +647,7 @@ TEST_F(VulkanComputeAPITest, tensor_reallocation_test) {
   c.reallocate(new_sizes);
 
   // Flush everything
-  api::context()->flush();
+  context()->flush();
 
   execute_and_check_add(a, b, c, 12.0f, 10.0f);
 }
@@ -655,15 +656,15 @@ TEST_F(
     VulkanComputeAPITest,
     tensor_reallocation_with_deferred_allocation_test) {
   std::vector<int64_t> sizes = {8, 8, 8};
-  api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
-  api::vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
-  api::vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
+  vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false);
 
-  api::Allocation a_mem = allocate_memory_for(a);
+  vkapi::Allocation a_mem = allocate_memory_for(a);
   a.image().bind_allocation(a_mem);
-  api::Allocation b_mem = allocate_memory_for(b);
+  vkapi::Allocation b_mem = allocate_memory_for(b);
   b.image().bind_allocation(b_mem);
-  api::Allocation c_mem = allocate_memory_for(c);
+  vkapi::Allocation c_mem = allocate_memory_for(c);
   c.image().bind_allocation(c_mem);
 
   execute_and_check_add(a, b, c, 4.0f, 8.0f);
@@ -678,7 +679,7 @@ TEST_F(
     c.reallocate(new_sizes);
 
     // Flush everything
-    api::context()->flush();
+    context()->flush();
 
     a.image().bind_allocation(a_mem);
     b.image().bind_allocation(b_mem);
@@ -690,11 +691,11 @@ TEST_F(
 }
 
 TEST_F(VulkanComputeAPITest, texture_virtual_resize) {
-  api::context()->set_cmd(/*reusable = */ true);
+  context()->set_cmd(/*reusable = */ true);
   std::vector<int64_t> sizes = {8, 12, 12};
-  api::vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
-  api::vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
-  api::vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
+  vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ true);
 
   DEFINE_STAGING_BUFFER_AND_RECORD_TO_GPU_FOR(a)
   DEFINE_STAGING_BUFFER_AND_RECORD_TO_GPU_FOR(b)
@@ -702,7 +703,7 @@ TEST_F(VulkanComputeAPITest, texture_virtual_resize) {
   fill_staging(staging_buffer_a, 11.5f);
   fill_staging(staging_buffer_b, 12.5f);
 
-  record_binary_op(api::context(), "add", a, b, c);
+  record_binary_op(context(), "add", a, b, c);
 
   DEFINE_STAGING_BUFFER_AND_RECORD_FROM_GPU_FOR(c)
 
@@ -799,12 +800,12 @@ TEST(VulkanComputeGraphTest, test_zero_dim_tensor) {
 
   // Build graph
 
-  IOValueRef a = graph.add_input_tensor(size_big, api::kFloat);
-  IOValueRef b = graph.add_input_tensor(size_small, api::kFloat);
+  IOValueRef a = graph.add_input_tensor(size_big, vkapi::kFloat);
+  IOValueRef b = graph.add_input_tensor(size_small, vkapi::kFloat);
 
   IOValueRef out = {};
 
-  out.value = graph.add_tensor(size_big, api::kFloat);
+  out.value = graph.add_tensor(size_big, vkapi::kFloat);
 
   auto addFn = VK_GET_OP_FN("aten.add.Tensor");
   addFn(graph, {a.value, b.value, kDummyValueRef, out.value});
@@ -843,11 +844,11 @@ TEST(VulkanComputeGraphTest, test_simple_graph_with_buffer) {
 
   // Build graph
 
-  IOValueRef a = graph.add_input_tensor(sizes, api::kFloat, api::kBuffer);
+  IOValueRef a = graph.add_input_tensor(sizes, vkapi::kFloat, vkapi::kBuffer);
 
   IOValueRef out = {};
 
-  out.value = graph.add_tensor(sizes, api::kFloat, api::kBuffer);
+  out.value = graph.add_tensor(sizes, vkapi::kFloat, vkapi::kBuffer);
 
   auto addFn = VK_GET_OP_FN("aten.abs.default");
   addFn(graph, {a.value, out.value, kDummyValueRef, kDummyValueRef});
@@ -885,12 +886,12 @@ TEST(VulkanComputeGraphTest, test_simple_graph) {
 
   // Build graph
 
-  IOValueRef a = graph.add_input_tensor(size_big, api::kFloat);
-  IOValueRef b = graph.add_input_tensor(size_small, api::kFloat);
+  IOValueRef a = graph.add_input_tensor(size_big, vkapi::kFloat);
+  IOValueRef b = graph.add_input_tensor(size_small, vkapi::kFloat);
 
   IOValueRef out = {};
 
-  out.value = graph.add_tensor(size_big, api::kFloat);
+  out.value = graph.add_tensor(size_big, vkapi::kFloat);
 
   auto addFn = VK_GET_OP_FN("aten.add.Tensor");
   addFn(graph, {a.value, b.value, kDummyValueRef, out.value});
@@ -934,15 +935,15 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) {
   std::vector<int64_t> size_big = {8, 73, 62};
   std::vector<int64_t> size_small = {8, 73, 1};
 
-  CREATE_WEIGHT_TENSOR(w1, size_small, api::kFloat, 3.5f);
-  CREATE_WEIGHT_TENSOR(w2, size_small, api::kFloat, 3.0f);
+  CREATE_WEIGHT_TENSOR(w1, size_small, vkapi::kFloat, 3.5f);
+  CREATE_WEIGHT_TENSOR(w2, size_small, vkapi::kFloat, 3.0f);
 
   // Build graph
 
-  IOValueRef a = graph.add_input_tensor(size_big, api::kFloat);
+  IOValueRef a = graph.add_input_tensor(size_big, vkapi::kFloat);
 
-  ValueRef c = graph.add_tensor(size_big, api::kFloat);
-  ValueRef e = graph.add_tensor(size_big, api::kFloat);
+  ValueRef c = graph.add_tensor(size_big, vkapi::kFloat);
+  ValueRef e = graph.add_tensor(size_big, vkapi::kFloat);
 
   auto addFn = VK_GET_OP_FN("aten.add.Tensor");
   addFn(graph, {a.value, w1, kDummyValueRef, c});
@@ -996,11 +997,11 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
 
   IOValueRef a = graph.add_input_tensor(
       size_big,
-      api::kFloat,
+      vkapi::kFloat,
       /*shared_object_idx = */ 2);
   IOValueRef b = graph.add_input_tensor(
       size_small,
-      api::kFloat,
+      vkapi::kFloat,
       /*shared_object_idx = */ 4);
 
   // +2: t.sizes_ubo() for each staging shader
@@ -1009,7 +1010,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
 
   ValueRef c = graph.add_tensor(
       size_big,
-      api::kFloat,
+      vkapi::kFloat,
       /*shared_object_idx = */ 6);
 
   auto addFn = VK_GET_OP_FN("aten.add.Tensor");
@@ -1017,7 +1018,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
 
   IOValueRef d = graph.add_input_tensor(
       size_small,
-      api::kFloat,
+      vkapi::kFloat,
       /*shared_object_idx = */ 2);
 
   // +2: alpha UBO, broadcast UBO for arithmetic shader
@@ -1027,7 +1028,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
 
   ValueRef e = graph.add_tensor(
       size_big,
-      api::kFloat,
+      vkapi::kFloat,
       /*shared_object_idx = */ 4);
 
   auto mulFn = VK_GET_OP_FN("aten.mul.Tensor");
@@ -1130,10 +1131,10 @@ TEST(VulkanComputeGraphTest, test_large_graph) {
 
   // Build graph
 
-  IOValueRef a = graph.add_input_tensor(size_big, api::kFloat, 2);
-  IOValueRef b = graph.add_input_tensor(size_small, api::kFloat, 4);
+  IOValueRef a = graph.add_input_tensor(size_big, vkapi::kFloat, 2);
+  IOValueRef b = graph.add_input_tensor(size_small, vkapi::kFloat, 4);
 
-  ValueRef c = graph.add_tensor(size_big, api::kFloat, 6);
+  ValueRef c = graph.add_tensor(size_big, vkapi::kFloat, 6);
 
   auto addFn = VK_GET_OP_FN("aten.add.Tensor");
   addFn(graph, {a.value, b.value, kDummyValueRef, c});
@@ -1215,15 +1216,15 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_offset_node) {
   int64_t c = 12;
   int64_t h = 4;
   int64_t w = 8;
-  api::GPUMemoryLayout memory_layout =
-      api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
+  vkapi::GPUMemoryLayout memory_layout =
+      vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
 
   std::vector<int64_t> size = {n, c, h, w};
 
-  IOValueRef a = graph.add_input_tensor(size, api::kFloat, memory_layout);
+  IOValueRef a = graph.add_input_tensor(size, vkapi::kFloat, memory_layout);
 
   IOValueRef out = {};
-  out.value = graph.add_tensor(size, api::kFloat, memory_layout);
+  out.value = graph.add_tensor(size, vkapi::kFloat, memory_layout);
 
   // Notice that copy_node operates on in texture's x, y, z dimension. In the
   // comment, we provide the cooresponding coordinate in nchw.
@@ -1297,15 +1298,15 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_channel_offset_node) {
   int64_t c = 12;
   int64_t h = 4;
   int64_t w = 8;
-  api::GPUMemoryLayout memory_layout =
-      api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
+  vkapi::GPUMemoryLayout memory_layout =
+      vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
 
   std::vector<int64_t> size = {n, c, h, w};
 
-  IOValueRef a = graph.add_input_tensor(size, api::kFloat, memory_layout);
+  IOValueRef a = graph.add_input_tensor(size, vkapi::kFloat, memory_layout);
 
   IOValueRef out = {};
-  out.value = graph.add_tensor(size, api::kFloat, memory_layout);
+  out.value = graph.add_tensor(size, vkapi::kFloat, memory_layout);
 
   int64_t src_offset = 2;
   int64_t dst_offset = 3;
@@ -1360,17 +1361,17 @@ TEST(
   int64_t c = 12;
   int64_t h = 4;
   int64_t w = 8;
-  api::GPUMemoryLayout memory_layout =
-      api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
+  vkapi::GPUMemoryLayout memory_layout =
+      vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
 
   std::vector<int64_t> size = {n, c, h, w};
 
-  IOValueRef zero = graph.add_input_tensor(size, api::kFloat, memory_layout);
-  IOValueRef a = graph.add_input_tensor(size, api::kFloat, memory_layout);
-  IOValueRef b = graph.add_input_tensor(size, api::kFloat, memory_layout);
+  IOValueRef zero = graph.add_input_tensor(size, vkapi::kFloat, memory_layout);
+  IOValueRef a = graph.add_input_tensor(size, vkapi::kFloat, memory_layout);
+  IOValueRef b = graph.add_input_tensor(size, vkapi::kFloat, memory_layout);
 
   IOValueRef out = {};
-  out.value = graph.add_tensor(size, api::kFloat, memory_layout);
+  out.value = graph.add_tensor(size, vkapi::kFloat, memory_layout);
 
   auto copyFn = VK_GET_OP_FN("etvk.copy_channel_offset");
 
@@ -1483,15 +1484,15 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_offset_int_node) {
   int64_t c = 12;
   int64_t h = 4;
   int64_t w = 8;
-  api::GPUMemoryLayout memory_layout =
-      api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
+  vkapi::GPUMemoryLayout memory_layout =
+      vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
 
   std::vector<int64_t> size = {n, c, h, w};
 
-  IOValueRef a = graph.add_input_tensor(size, api::kInt, memory_layout);
+  IOValueRef a = graph.add_input_tensor(size, vkapi::kInt, memory_layout);
 
   IOValueRef out = {};
-  out.value = graph.add_tensor(size, api::kInt, memory_layout);
+  out.value = graph.add_tensor(size, vkapi::kInt, memory_layout);
 
   // Notice that copy_node operates on in texture's x, y, z dimension. In the
   // comment, we provide the cooresponding coordinate in nchw.
@@ -1565,15 +1566,15 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_channel_offset_int_node) {
   int64_t c = 12;
   int64_t h = 4;
   int64_t w = 8;
-  api::GPUMemoryLayout memory_layout =
-      api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
+  vkapi::GPUMemoryLayout memory_layout =
+      vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED;
 
   std::vector<int64_t> size = {n, c, h, w};
 
-  IOValueRef a = graph.add_input_tensor(size, api::kFloat, memory_layout);
+  IOValueRef a = graph.add_input_tensor(size, vkapi::kFloat, memory_layout);
 
   IOValueRef out = {};
-  out.value = graph.add_tensor(size, api::kFloat, memory_layout);
+  out.value = graph.add_tensor(size, vkapi::kFloat, memory_layout);
 
   int64_t src_offset = 2;
   int64_t dst_offset = 3;
@@ -1615,17 +1616,17 @@ TEST(VulkanComputeGraphTest, test_etvk_copy_channel_offset_int_node) {
 }
 
 TEST(VulkanComputeGraphTest, test_view_change_packing) {
-  std::vector<std::pair<api::GPUMemoryLayout, api::GPUMemoryLayout>>
+  std::vector<std::pair<vkapi::GPUMemoryLayout, vkapi::GPUMemoryLayout>>
       layout_pairs = {
-          {kWidthPacked, kChannelsPacked},
-          {kWidthPacked, kHeightPacked},
-          {kWidthPacked, kWidthPacked},
-          {kHeightPacked, kChannelsPacked},
-          {kHeightPacked, kHeightPacked},
-          {kHeightPacked, kHeightPacked},
-          {kChannelsPacked, kChannelsPacked},
-          {kChannelsPacked, kHeightPacked},
-          {kChannelsPacked, kHeightPacked},
+          {vkapi::kWidthPacked, vkapi::kChannelsPacked},
+          {vkapi::kWidthPacked, vkapi::kHeightPacked},
+          {vkapi::kWidthPacked, vkapi::kWidthPacked},
+          {vkapi::kHeightPacked, vkapi::kChannelsPacked},
+          {vkapi::kHeightPacked, vkapi::kHeightPacked},
+          {vkapi::kHeightPacked, vkapi::kHeightPacked},
+          {vkapi::kChannelsPacked, vkapi::kChannelsPacked},
+          {vkapi::kChannelsPacked, vkapi::kHeightPacked},
+          {vkapi::kChannelsPacked, vkapi::kHeightPacked},
       };
 
   int64_t n = 3;
@@ -1639,10 +1640,10 @@ TEST(VulkanComputeGraphTest, test_view_change_packing) {
     ComputeGraph graph(config);
 
     IOValueRef in =
-        graph.add_input_tensor(size, api::kFloat, layout_pair.first);
+        graph.add_input_tensor(size, vkapi::kFloat, layout_pair.first);
 
     IOValueRef out = {};
-    out.value = graph.add_tensor(size, api::kFloat, layout_pair.second);
+    out.value = graph.add_tensor(size, vkapi::kFloat, layout_pair.second);
 
     auto viewFn = VK_GET_OP_FN("aten.view_copy.default");
     viewFn(graph, {in.value, graph.add_none(), out.value});
@@ -1674,7 +1675,7 @@ class VulkanToFromGPUShaderTest : public ::testing::Test {
   }
 
   void TearDown() override {
-    api::context()->flush();
+    context()->flush();
 
     // Make sure we are ending with a clean slate
     EXPECT_TRUE(get_vma_allocation_count() == 0);
@@ -1684,29 +1685,27 @@ class VulkanToFromGPUShaderTest : public ::testing::Test {
 template <typename T>
 void run_from_gpu_test(
     std::vector<int64_t>& sizes,
-    api::GPUMemoryLayout memory_layout =
-        api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED,
-    api::ScalarType dtype = api::kFloat,
-    api::StorageType storage_type = api::StorageType::TEXTURE_3D) {
-  if (dtype == api::kHalf &&
-      !api::context()->adapter_ptr()->has_16bit_storage()) {
+    vkapi::GPUMemoryLayout memory_layout =
+        vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED,
+    vkapi::ScalarType dtype = vkapi::kFloat,
+    vkapi::StorageType storage_type = vkapi::StorageType::TEXTURE_3D) {
+  if (dtype == vkapi::kHalf && !context()->adapter_ptr()->has_16bit_storage()) {
     return;
   }
-  if ((dtype == api::kChar || dtype == api::kQInt8) &&
-      !api::context()->adapter_ptr()->has_full_int8_buffers_support()) {
+  if ((dtype == vkapi::kChar || dtype == vkapi::kQInt8) &&
+      !context()->adapter_ptr()->has_full_int8_buffers_support()) {
     return;
   }
-  api::vTensor vten =
-      api::vTensor(api::context(), sizes, dtype, storage_type, memory_layout);
+  vTensor vten = vTensor(context(), sizes, dtype, storage_type, memory_layout);
 
   std::string kernel_name("idx_fill_texture");
   add_memory_layout_suffix(kernel_name, vten);
   add_dtype_suffix(kernel_name, vten);
 
   {
-    api::PipelineBarrier pipeline_barrier{};
-    api::SpecVarList specialization_constants = {vten.packed_dim_whcn_idx()};
-    api::context()->submit_compute_job(
+    vkapi::PipelineBarrier pipeline_barrier{};
+    vkapi::SpecVarList specialization_constants = {vten.packed_dim_whcn_idx()};
+    context()->submit_compute_job(
         VK_KERNEL_FROM_STR(kernel_name),
         pipeline_barrier,
         vten.image_extents(),
@@ -1716,14 +1715,14 @@ void run_from_gpu_test(
         0,
         vten.image(
             pipeline_barrier,
-            api::PipelineStage::COMPUTE,
-            api::MemoryAccessType::WRITE),
+            vkapi::PipelineStage::COMPUTE,
+            vkapi::MemoryAccessType::WRITE),
         vten.sizes_ubo());
   }
 
-  api::StorageBuffer staging_buffer(api::context(), dtype, vten.gpu_numel());
+  StorageBuffer staging_buffer(context(), dtype, vten.gpu_numel());
 
-  record_image_to_nchw_op(api::context(), vten, staging_buffer.buffer());
+  record_image_to_nchw_op(context(), vten, staging_buffer.buffer());
 
   submit_to_gpu();
 
@@ -1738,24 +1737,22 @@ void run_from_gpu_test(
 template <typename T>
 void run_to_gpu_test(
     std::vector<int64_t>& sizes,
-    api::GPUMemoryLayout memory_layout =
-        api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED,
-    api::ScalarType dtype = api::kFloat,
-    api::StorageType storage_type = api::StorageType::TEXTURE_3D) {
-  if (dtype == api::kHalf &&
-      !api::context()->adapter_ptr()->has_16bit_storage()) {
+    vkapi::GPUMemoryLayout memory_layout =
+        vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED,
+    vkapi::ScalarType dtype = vkapi::kFloat,
+    vkapi::StorageType storage_type = vkapi::StorageType::TEXTURE_3D) {
+  if (dtype == vkapi::kHalf && !context()->adapter_ptr()->has_16bit_storage()) {
     return;
   }
-  if ((dtype == api::kChar || dtype == api::kQInt8) &&
-      !api::context()->adapter_ptr()->has_full_int8_buffers_support()) {
+  if ((dtype == vkapi::kChar || dtype == vkapi::kQInt8) &&
+      !context()->adapter_ptr()->has_full_int8_buffers_support()) {
     return;
   }
 
-  api::vTensor vten =
-      api::vTensor(api::context(), sizes, dtype, storage_type, memory_layout);
+  vTensor vten = vTensor(context(), sizes, dtype, storage_type, memory_layout);
 
   // Create and fill input staging buffer
-  api::StorageBuffer staging_buffer_in(api::context(), dtype, vten.gpu_numel());
+  StorageBuffer staging_buffer_in(context(), dtype, vten.gpu_numel());
 
   std::vector<T> data_in(staging_buffer_in.numel());
   for (int i = 0; i < staging_buffer_in.numel(); i++) {
@@ -1764,12 +1761,11 @@ void run_to_gpu_test(
   copy_ptr_to_staging(data_in.data(), staging_buffer_in, vten.gpu_nbytes());
 
   // Output staging buffer
-  api::StorageBuffer staging_buffer_out(
-      api::context(), dtype, vten.gpu_numel());
+  StorageBuffer staging_buffer_out(context(), dtype, vten.gpu_numel());
 
   // Copy data in and out of the tensor
-  record_nchw_to_image_op(api::context(), staging_buffer_in.buffer(), vten);
-  record_image_to_nchw_op(api::context(), vten, staging_buffer_out.buffer());
+  record_nchw_to_image_op(context(), staging_buffer_in.buffer(), vten);
+  record_image_to_nchw_op(context(), vten, staging_buffer_out.buffer());
 
   // Execute command buffer
   submit_to_gpu();
@@ -1827,21 +1823,21 @@ TEST(VulkanToFromGPUShaderTest, to_gpu_and_from_gpu_test_texture) {
       {7, 1, 6, 3},
   };
 
-#define RUN_TESTS(ctype, dtype)                                    \
-  run_to_gpu_test<ctype>(                                          \
-      sizes, api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, dtype); \
-  run_to_gpu_test<ctype>(                                          \
-      sizes, api::GPUMemoryLayout::TENSOR_WIDTH_PACKED, dtype);    \
-  run_to_gpu_test<ctype>(                                          \
-      sizes, api::GPUMemoryLayout::TENSOR_HEIGHT_PACKED, dtype);
+#define RUN_TESTS(ctype, dtype)                                      \
+  run_to_gpu_test<ctype>(                                            \
+      sizes, vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, dtype); \
+  run_to_gpu_test<ctype>(                                            \
+      sizes, vkapi::GPUMemoryLayout::TENSOR_WIDTH_PACKED, dtype);    \
+  run_to_gpu_test<ctype>(                                            \
+      sizes, vkapi::GPUMemoryLayout::TENSOR_HEIGHT_PACKED, dtype);
 
   for (auto& sizes : to_test) {
-    RUN_TESTS(float, api::kFloat)
-    RUN_TESTS(torch::executor::Half, api::kHalf)
+    RUN_TESTS(float, vkapi::kFloat)
+    RUN_TESTS(torch::executor::Half, vkapi::kHalf)
   }
 
   for (auto& sizes : to_test_int8) {
-    RUN_TESTS(int8_t, api::kChar);
+    RUN_TESTS(int8_t, vkapi::kChar);
   }
 
 #undef RUN_TESTS
@@ -1855,8 +1851,8 @@ void test_binary_op(
     std::string op_name,
     std::vector<int64_t> sizes_big,
     std::vector<int64_t> sizes_small,
-    api::ScalarType dtype,
-    api::GPUMemoryLayout memory_layout,
+    vkapi::ScalarType dtype,
+    vkapi::GPUMemoryLayout memory_layout,
     bool prepack = true) {
   GraphConfig config;
   ComputeGraph graph(config);
@@ -1915,21 +1911,21 @@ void test_binary_op(
   }
 }
 
-#define CALL_TEST_FN_FORALL_CONDITIONS(_)                             \
-  _(api::kFloat, api::GPUMemoryLayout::TENSOR_WIDTH_PACKED, false)    \
-  _(api::kFloat, api::GPUMemoryLayout::TENSOR_HEIGHT_PACKED, false)   \
-  _(api::kFloat, api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, false) \
-  _(api::kFloat, api::GPUMemoryLayout::TENSOR_WIDTH_PACKED, true)     \
-  _(api::kFloat, api::GPUMemoryLayout::TENSOR_HEIGHT_PACKED, true)    \
-  _(api::kFloat, api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, true)
+#define CALL_TEST_FN_FORALL_CONDITIONS(_)                                 \
+  _(vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_WIDTH_PACKED, false)    \
+  _(vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_HEIGHT_PACKED, false)   \
+  _(vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, false) \
+  _(vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_WIDTH_PACKED, true)     \
+  _(vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_HEIGHT_PACKED, true)    \
+  _(vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, true)
 
-#define CALL_TEST_FN_FOR_W_PACKED(_)                               \
-  _(api::kFloat, api::GPUMemoryLayout::TENSOR_WIDTH_PACKED, false) \
-  _(api::kFloat, api::GPUMemoryLayout::TENSOR_WIDTH_PACKED, true)
+#define CALL_TEST_FN_FOR_W_PACKED(_)                                   \
+  _(vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_WIDTH_PACKED, false) \
+  _(vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_WIDTH_PACKED, true)
 
-#define CALL_TEST_FN_FOR_C_PACKED(_)                                  \
-  _(api::kFloat, api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, false) \
-  _(api::kFloat, api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, true)
+#define CALL_TEST_FN_FOR_C_PACKED(_)                                      \
+  _(vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, false) \
+  _(vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED, true)
 
 TEST(VulkanComputeGraphOpsTest, add_smoke_test) {
 #define RUN_TESTS(dtype, layout, prepack)                                  \
@@ -1952,8 +1948,8 @@ void test_mm(
     int M,
     int K,
     int N,
-    api::ScalarType dtype,
-    api::GPUMemoryLayout memory_layout,
+    vkapi::ScalarType dtype,
+    vkapi::GPUMemoryLayout memory_layout,
     bool prepack = true) {
   GraphConfig config;
   ComputeGraph graph(config);
@@ -2068,13 +2064,13 @@ void test_max_pool2d(
   out_size[w] = in_size[w] - kernel[1] + 1;
 
   IOValueRef in_ioval = graph.add_input_tensor(
-      in_size, api::kFloat, api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED);
+      in_size, vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED);
   IOValueRef out_ioval;
   out_ioval.value = graph.add_tensor(
-      out_size, api::kFloat, api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED);
+      out_size, vkapi::kFloat, vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED);
   IOValueRef idx_ioval;
   idx_ioval.value = graph.add_tensor(
-      out_size, api::kInt, api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED);
+      out_size, vkapi::kInt, vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED);
   ValueRef out = graph.add_value_list({out_ioval.value, idx_ioval.value});
 
   std::vector<int64_t> kernel_copy(kernel);
@@ -2147,16 +2143,16 @@ void test_conv2d(
     const std::vector<int64_t>& gpu_sizes,
     const bool transposed,
     const std::vector<float>& data_out_expected) {
-  api::vTensor vten = api::vTensor(
-      api::context(),
+  vTensor vten = vTensor(
+      context(),
       gpu_sizes,
-      api::kFloat,
-      api::StorageType::TEXTURE_2D,
-      api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED);
+      vkapi::kFloat,
+      vkapi::StorageType::TEXTURE_2D,
+      vkapi::GPUMemoryLayout::TENSOR_CHANNELS_PACKED);
 
   // Create and fill input staging buffer
   const int64_t in_numel = utils::multiply_integers(original_sizes);
-  api::StorageBuffer staging_buffer_in(api::context(), api::kFloat, in_numel);
+  StorageBuffer staging_buffer_in(context(), vkapi::kFloat, in_numel);
 
   std::vector<float> data_in(in_numel);
   for (int i = 0; i < in_numel; i++) {
@@ -2168,16 +2164,12 @@ void test_conv2d(
   // Output staging buffer
   const int64_t out_numel =
       padded_sizes[0] * padded_sizes[1] * original_sizes[2] * original_sizes[3];
-  api::StorageBuffer staging_buffer_out(api::context(), api::kFloat, out_numel);
+  StorageBuffer staging_buffer_out(context(), vkapi::kFloat, out_numel);
 
   // Copy data in and out of the tensor
   record_conv2d_prepack_weights_op(
-      api::context(),
-      staging_buffer_in.buffer(),
-      vten,
-      original_sizes,
-      transposed);
-  record_image_to_nchw_op(api::context(), vten, staging_buffer_out.buffer());
+      context(), staging_buffer_in.buffer(), vten, original_sizes, transposed);
+  record_image_to_nchw_op(context(), vten, staging_buffer_out.buffer());
 
   // Execute command buffer
   submit_to_gpu();