From 15983bac6e67387232c9051e6876377818c82522 Mon Sep 17 00:00:00 2001 From: Jorge Pineda Date: Wed, 8 May 2024 09:36:11 -0700 Subject: [PATCH] [ET-VK] Use VkPipelineCache file if path is specified ## Context Pipeline creation involves the compilation of shader SPIR-V code into machine-specific code. This makes the application's first model-load via the `Program::load_method()` ET-API very slow, due to the creation of compute pipelines via the `vkCreateComputePipelines()` VK-API. To amortize that cost, Vulkan offers a [Compute Pipeline Cache API](https://docs.vulkan.org/guide/latest/pipeline_cache.html). Following [this Vulkan example](https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/performance/pipeline_cache), we can (1) retrieve the compiled machine-specific code saving it to a file and (2) load it to a file next time. For an internal model executing on a resource-constrained device, this improves model-load time from ~1200ms to ~500ms. ## This change We implement the logic for (2), though this change is a no-op unless you initialize the `pipeline_cache_file_path` manually. The expectation is for the client application to specify the file path of their pipeline cache data if they want to leverage this optimization. Before that's ready, we will A. Expose this file_path config parameter to the ET-API, and B. Demonstrate (1) how to retrieve the data to save to a file. Differential Revision: [D57085276](https://our.internmc.facebook.com/intern/diff/D57085276/) [ghstack-poisoned] --- backends/vulkan/runtime/api/Adapter.cpp | 5 ++-- backends/vulkan/runtime/api/Adapter.h | 3 +- backends/vulkan/runtime/api/Pipeline.cpp | 36 +++++++++++++++++++++--- backends/vulkan/runtime/api/Pipeline.h | 4 ++- backends/vulkan/runtime/api/Runtime.cpp | 7 ++++- backends/vulkan/runtime/api/Runtime.h | 1 + 6 files changed, 47 insertions(+), 9 deletions(-) diff --git a/backends/vulkan/runtime/api/Adapter.cpp b/backends/vulkan/runtime/api/Adapter.cpp index 5db2642e3ec..ca8bceb5258 100644 --- a/backends/vulkan/runtime/api/Adapter.cpp +++ b/backends/vulkan/runtime/api/Adapter.cpp @@ -292,7 +292,8 @@ DeviceHandle::~DeviceHandle() { Adapter::Adapter( VkInstance instance, PhysicalDevice physical_device, - const uint32_t num_queues) + const uint32_t num_queues, + const std::string& pipeline_cache_file_path) : queue_usage_mutex_{}, physical_device_(std::move(physical_device)), queues_{}, @@ -307,7 +308,7 @@ Adapter::Adapter( shader_layout_cache_(device_.handle_), shader_cache_(device_.handle_), pipeline_layout_cache_(device_.handle_), - compute_pipeline_cache_(device_.handle_), + compute_pipeline_cache_(device_.handle_, pipeline_cache_file_path), sampler_cache_(device_.handle_), vma_(instance_, physical_device_.handle, device_.handle_) {} diff --git a/backends/vulkan/runtime/api/Adapter.h b/backends/vulkan/runtime/api/Adapter.h index b038aea9fa8..d5529774e1f 100644 --- a/backends/vulkan/runtime/api/Adapter.h +++ b/backends/vulkan/runtime/api/Adapter.h @@ -99,7 +99,8 @@ class Adapter final { explicit Adapter( VkInstance instance, PhysicalDevice physical_device, - const uint32_t num_queues); + const uint32_t num_queues, + const std::string& pipeline_cache_file_path); Adapter(const Adapter&) = delete; Adapter& operator=(const Adapter&) = delete; diff --git a/backends/vulkan/runtime/api/Pipeline.cpp b/backends/vulkan/runtime/api/Pipeline.cpp index f4be0039e67..6c845a698ef 100644 --- a/backends/vulkan/runtime/api/Pipeline.cpp +++ b/backends/vulkan/runtime/api/Pipeline.cpp @@ -8,6 +8,8 @@ #include +#include + namespace vkcompute { namespace api { @@ -358,23 +360,49 @@ void PipelineLayoutCache::purge() { // ComputePipelineCache // -ComputePipelineCache::ComputePipelineCache(VkDevice device) +ComputePipelineCache::ComputePipelineCache( + VkDevice device, + const std::string& file_path) : cache_mutex_{}, device_(device), pipeline_cache_{VK_NULL_HANDLE}, cache_{} { - const VkPipelineCacheCreateInfo pipeline_cache_create_info{ + VkPipelineCacheCreateInfo pipeline_cache_create_info{}; + + auto buffer = get_cache_data(file_path); + + pipeline_cache_create_info = { VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // sType nullptr, // pNext 0u, // flags - 0u, // initialDataSize - nullptr, // pInitialData + buffer.size(), // initialDataSize + buffer.data(), // pInitialData }; VK_CHECK(vkCreatePipelineCache( device, &pipeline_cache_create_info, nullptr, &pipeline_cache_)); } +std::vector ComputePipelineCache::get_cache_data( + const std::string& file_path) { + if (file_path.empty()) { + return {}; + } + + std::ifstream file(file_path, std::ios::binary | std::ios::ate); + if (file.fail()) { + VK_THROW("Failed to open pipeline cache file: " + file_path); + } + auto size = file.tellg(); + file.seekg(0, std::ios::beg); + + std::vector buffer(size); + file.read(buffer.data(), size); + file.close(); + + return buffer; +} + ComputePipelineCache::ComputePipelineCache( ComputePipelineCache&& other) noexcept : cache_mutex_{}, diff --git a/backends/vulkan/runtime/api/Pipeline.h b/backends/vulkan/runtime/api/Pipeline.h index b8c16efd910..e4436fe33b6 100644 --- a/backends/vulkan/runtime/api/Pipeline.h +++ b/backends/vulkan/runtime/api/Pipeline.h @@ -214,7 +214,7 @@ class PipelineLayoutCache final { class ComputePipelineCache final { public: - explicit ComputePipelineCache(VkDevice device); + explicit ComputePipelineCache(VkDevice device, const std::string& file_path); ComputePipelineCache(const ComputePipelineCache&) = delete; ComputePipelineCache& operator=(const ComputePipelineCache&) = delete; @@ -264,6 +264,8 @@ class ComputePipelineCache final { }; private: + std::vector get_cache_data(const std::string& file_path); + // Multiple threads could potentially be adding entries into the cache, so use // a mutex to manage access std::mutex cache_mutex_; diff --git a/backends/vulkan/runtime/api/Runtime.cpp b/backends/vulkan/runtime/api/Runtime.cpp index ebed34162f3..b0b67faa11b 100644 --- a/backends/vulkan/runtime/api/Runtime.cpp +++ b/backends/vulkan/runtime/api/Runtime.cpp @@ -253,12 +253,14 @@ std::unique_ptr init_global_vulkan_runtime() { #endif /* VULKAN_DEBUG */ const bool init_default_device = true; const uint32_t num_requested_queues = 1; // TODO: raise this value + const std::string pipeline_cache_file_path = ""; // TODO: expose to client const RuntimeConfiguration default_config{ enable_validation_messages, init_default_device, AdapterSelector::First, num_requested_queues, + pipeline_cache_file_path, }; try { @@ -351,7 +353,10 @@ uint32_t Runtime::create_adapter(const Selector& selector) { // Otherwise, create an adapter for the selected physical device adapter_i = utils::safe_downcast(adapters_.size()); adapters_.emplace_back(new Adapter( - instance_, device_mapping.first, config_.num_requested_queues)); + instance_, + device_mapping.first, + config_.num_requested_queues, + config_.pipeline_cache_file_path)); device_mapping.second = adapter_i; return adapter_i; diff --git a/backends/vulkan/runtime/api/Runtime.h b/backends/vulkan/runtime/api/Runtime.h index 6cfcc0ca03a..bfb533ffc3d 100644 --- a/backends/vulkan/runtime/api/Runtime.h +++ b/backends/vulkan/runtime/api/Runtime.h @@ -39,6 +39,7 @@ struct RuntimeConfiguration final { bool init_default_device; AdapterSelector default_selector; uint32_t num_requested_queues; + std::string pipeline_cache_file_path; }; class Runtime final {