diff --git a/src/runtime/vulkan/vulkan_amdrgp.cc b/src/runtime/vulkan/vulkan_amdrgp.cc new file mode 100644 index 000000000000..54e566410f49 --- /dev/null +++ b/src/runtime/vulkan/vulkan_amdrgp.cc @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "vulkan_device.h" + +namespace tvm { +namespace runtime { +namespace vulkan { + +VulkanStreamProfiler::VulkanStreamProfiler(const VulkanDevice* device) + : device_(device), curr_state_(READY), available_(device->UseDebugUtilsLabel()) {} + +void AmdRgpProfiler::capture() { + if (!available_) { + return; + } + + // Trigger RGP capture by using dummy present and switch state from READY to RUNNING + if (curr_state_ == READY) { + VkDebugUtilsLabelEXT frame_end_label = { + VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, NULL, "AmdFrameEnd", {0.0f, 0.0f, 0.0f, 0.0f}}; + device_->queue_insert_debug_utils_label_functions->vkQueueInsertDebugUtilsLabelEXT( + device_->Queue(), &frame_end_label); + + VkDebugUtilsLabelEXT frame_begin_label = { + VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, NULL, "AmdFrameBegin", {0.0f, 0.0f, 0.0f, 0.0f}}; + device_->queue_insert_debug_utils_label_functions->vkQueueInsertDebugUtilsLabelEXT( + device_->Queue(), &frame_begin_label); + + // Set state as RUNNING + curr_state_ = RUNNING; + } +} + +} // namespace vulkan +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/vulkan/vulkan_amdrgp.h b/src/runtime/vulkan/vulkan_amdrgp.h new file mode 100644 index 000000000000..aa090eeaa829 --- /dev/null +++ b/src/runtime/vulkan/vulkan_amdrgp.h @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TVM_RUNTIME_VULKAN_VULKAN_AMDRGP_H_ +#define TVM_RUNTIME_VULKAN_VULKAN_AMDRGP_H_ + +namespace tvm { +namespace runtime { +namespace vulkan { + +class VulkanDevice; + +class VulkanStreamProfiler { + public: + enum state { READY = 0, RUNNING, RESET }; + + explicit VulkanStreamProfiler(const VulkanDevice* device); + + virtual ~VulkanStreamProfiler() {} + + virtual void reset() { curr_state_ = RESET; } + + virtual void ready() { + if (curr_state_ == RESET) { + curr_state_ = READY; + } + } + + virtual void capture() = 0; + + protected: + const VulkanDevice* device_; + state curr_state_; + bool available_; +}; + +class AmdRgpProfiler : public VulkanStreamProfiler { + public: + explicit AmdRgpProfiler(const VulkanDevice* device) : VulkanStreamProfiler(device) {} + + void capture(); +}; + +} // namespace vulkan +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_VULKAN_VULKAN_AMDRGP_H_ diff --git a/src/runtime/vulkan/vulkan_device.cc b/src/runtime/vulkan/vulkan_device.cc index 29908bed8189..7a6b92943c90 100644 --- a/src/runtime/vulkan/vulkan_device.cc +++ b/src/runtime/vulkan/vulkan_device.cc @@ -228,6 +228,12 @@ VulkanGetBufferMemoryRequirements2Functions::VulkanGetBufferMemoryRequirements2F vkGetDeviceProcAddr(device, "vkGetBufferMemoryRequirements2KHR")); } +VulkanQueueInsertDebugUtilsLabelFunctions::VulkanQueueInsertDebugUtilsLabelFunctions( + VkInstance instance) { + vkQueueInsertDebugUtilsLabelEXT = (PFN_vkQueueInsertDebugUtilsLabelEXT)ICHECK_NOTNULL( + vkGetInstanceProcAddr(instance, "vkQueueInsertDebugUtilsLabelEXT")); +} + VulkanDevice::VulkanDevice(const VulkanInstance& instance, VkPhysicalDevice phy_device) : physical_device_(phy_device) { queue_family_index = SelectComputeQueueFamily(); @@ -325,6 +331,11 @@ VulkanDevice::VulkanDevice(const VulkanInstance& instance, VkPhysicalDevice phy_ get_buffer_memory_requirements_2_functions = std::make_unique(device_); } + + if (instance.HasExtension("VK_EXT_debug_utils")) { + queue_insert_debug_utils_label_functions = + std::make_unique(instance); + } } VulkanDevice::~VulkanDevice() { @@ -363,6 +374,8 @@ void VulkanDevice::do_swap(VulkanDevice&& other) { std::swap(descriptor_template_khr_functions, other.descriptor_template_khr_functions); std::swap(get_buffer_memory_requirements_2_functions, other.get_buffer_memory_requirements_2_functions); + std::swap(queue_insert_debug_utils_label_functions, + other.queue_insert_debug_utils_label_functions); std::swap(compute_mtype_index, other.compute_mtype_index); std::swap(queue, other.queue); std::swap(queue_family_index, other.queue_family_index); diff --git a/src/runtime/vulkan/vulkan_device.h b/src/runtime/vulkan/vulkan_device.h index 3ca2d093bf1d..a1257a732aff 100644 --- a/src/runtime/vulkan/vulkan_device.h +++ b/src/runtime/vulkan/vulkan_device.h @@ -57,6 +57,12 @@ struct VulkanGetBufferMemoryRequirements2Functions { PFN_vkGetBufferMemoryRequirements2KHR vkGetBufferMemoryRequirements2KHR{nullptr}; }; +struct VulkanQueueInsertDebugUtilsLabelFunctions { + explicit VulkanQueueInsertDebugUtilsLabelFunctions(VkInstance instance); + + PFN_vkQueueInsertDebugUtilsLabelEXT vkQueueInsertDebugUtilsLabelEXT{nullptr}; +}; + /*! * \brief Stores the capabilities/limits queried from the physical device. * @@ -212,6 +218,8 @@ class VulkanDevice { std::unique_ptr descriptor_template_khr_functions{nullptr}; std::unique_ptr get_buffer_memory_requirements_2_functions{nullptr}; + std::unique_ptr + queue_insert_debug_utils_label_functions{nullptr}; // Memory type index for compute uint32_t compute_mtype_index{0}; @@ -220,6 +228,10 @@ class VulkanDevice { bool UseImmediate() const { return descriptor_template_khr_functions != nullptr; } + bool UseDebugUtilsLabel() const { return queue_insert_debug_utils_label_functions != nullptr; } + + VkQueue Queue() const { return queue; } + private: /*! \brief Helper function for move assignment/construction * diff --git a/src/runtime/vulkan/vulkan_device_api.cc b/src/runtime/vulkan/vulkan_device_api.cc index 676f14667d70..93f017a5aa66 100644 --- a/src/runtime/vulkan/vulkan_device_api.cc +++ b/src/runtime/vulkan/vulkan_device_api.cc @@ -367,6 +367,7 @@ void VulkanDeviceAPI::CopyDataFromTo(const void* from, size_t from_offset, void* ©_info); }); stream.Synchronize(); + stream.ProfilerReset(); if (!device.coherent_staging) { VkMappedMemoryRange mrange; mrange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; @@ -413,6 +414,8 @@ void VulkanDeviceAPI::CopyDataFromTo(const void* from, size_t from_offset, void* vkCmdCopyBuffer(state->cmd_buffer_, staging_buffer.vk_buf.buffer, to_buf->buffer, 1, ©_info); }); + + stream.ProfilerReady(); // TODO(tulloch): should we instead make the staging buffer a property of the // Stream? This would allow us to elide synchronizations here. stream.Synchronize(); diff --git a/src/runtime/vulkan/vulkan_instance.cc b/src/runtime/vulkan/vulkan_instance.cc index b8295d2cd605..a77531a5214f 100644 --- a/src/runtime/vulkan/vulkan_instance.cc +++ b/src/runtime/vulkan/vulkan_instance.cc @@ -59,6 +59,13 @@ VulkanInstance::VulkanInstance() { std::vector required_extensions{}; std::vector optional_extensions{"VK_KHR_get_physical_device_properties2"}; + // Check if RGP support is needed. If needed, enable VK_EXT_debug_utils extension for + // inserting debug labels into the queue. + if (support::BoolEnvironmentVar("TVM_USE_AMD_RGP")) { + LOG(INFO) << "Push VK_EXT_debug_utils"; + required_extensions.push_back("VK_EXT_debug_utils"); + } + uint32_t inst_extension_prop_count; VULKAN_CALL( vkEnumerateInstanceExtensionProperties(nullptr, &inst_extension_prop_count, nullptr)); diff --git a/src/runtime/vulkan/vulkan_stream.cc b/src/runtime/vulkan/vulkan_stream.cc index 3eff112a6eea..5cdb5768924b 100644 --- a/src/runtime/vulkan/vulkan_stream.cc +++ b/src/runtime/vulkan/vulkan_stream.cc @@ -19,6 +19,7 @@ #include "vulkan_stream.h" +#include "../../support/utils.h" #include "vulkan_device.h" namespace tvm { @@ -55,11 +56,19 @@ VulkanStream::VulkanStream(const VulkanDevice* device) cb_begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; cb_begin.pInheritanceInfo = 0; VULKAN_CALL(vkBeginCommandBuffer(state_->cmd_buffer_, &cb_begin)); + + if (support::BoolEnvironmentVar("TVM_USE_AMD_RGP")) { + profiler_ = new AmdRgpProfiler(device_); + } } VulkanStream::~VulkanStream() { vkDestroyFence(*device_, state_->fence_, nullptr); vkDestroyCommandPool(*device_, cmd_pool_, nullptr); + + if (profiler_) { + delete (profiler_); + } } void VulkanStream::Launch(const std::function& kernel) { @@ -132,6 +141,10 @@ void VulkanStream::Synchronize() { cb_submit.signalSemaphoreCount = 0; cb_submit.pSignalSemaphores = nullptr; + if (profiler_) { + profiler_->capture(); + } + device_->QueueSubmit(cb_submit, state_->fence_); uint64_t timeout = 1UL << 30UL; diff --git a/src/runtime/vulkan/vulkan_stream.h b/src/runtime/vulkan/vulkan_stream.h index fb4e447c15e1..742a66f15dd4 100644 --- a/src/runtime/vulkan/vulkan_stream.h +++ b/src/runtime/vulkan/vulkan_stream.h @@ -25,6 +25,7 @@ #include #include +#include "vulkan_amdrgp.h" #include "vulkan_common.h" namespace tvm { @@ -99,6 +100,20 @@ class VulkanStream { const std::function& deferred_kernel, const VulkanStreamToken& deferred_token); + // reset profiler state + void ProfilerReset() { + if (profiler_) { + profiler_->reset(); + } + } + + // set profiler to READY state after reset + void ProfilerReady() { + if (profiler_) { + profiler_->ready(); + } + } + // Synchronize the current stream `state_` with respect to the host. void Synchronize(); @@ -110,6 +125,7 @@ class VulkanStream { std::unordered_map> deferred_tokens_; std::vector> deferred_kernels_; VkCommandPool cmd_pool_; + VulkanStreamProfiler* profiler_ = nullptr; }; } // namespace vulkan diff --git a/src/runtime/vulkan/vulkan_wrapped_func.cc b/src/runtime/vulkan/vulkan_wrapped_func.cc index 0712f723bb64..f06ca5043b01 100644 --- a/src/runtime/vulkan/vulkan_wrapped_func.cc +++ b/src/runtime/vulkan/vulkan_wrapped_func.cc @@ -98,6 +98,15 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, vkCmdPipelineBarrier(state->cmd_buffer_, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &barrier_info, 0, nullptr, 0, nullptr); + + if (device.UseDebugUtilsLabel()) { + VkDebugUtilsLabelEXT dispatch_label = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + NULL, + func_name_.c_str(), + {0.0f, 0.0f, 0.0f, 0.0f}}; + device.queue_insert_debug_utils_label_functions->vkQueueInsertDebugUtilsLabelEXT( + device.Queue(), &dispatch_label); + } }); return; } @@ -164,6 +173,15 @@ void VulkanWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv, deferred_token.buffers_[i] = descriptor_buffers[i].buffer; } device.ThreadLocalStream().LaunchDeferred(deferred_initializer, deferred_kernel, deferred_token); + + if (device.UseDebugUtilsLabel()) { + VkDebugUtilsLabelEXT dispatch_label = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, + NULL, + func_name_.c_str(), + {0.0f, 0.0f, 0.0f, 0.0f}}; + device.queue_insert_debug_utils_label_functions->vkQueueInsertDebugUtilsLabelEXT( + device.Queue(), &dispatch_label); + } } VulkanModuleNode::~VulkanModuleNode() {