diff --git a/CMakeLists.txt b/CMakeLists.txt index 40004729a..49b263bfe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -300,6 +300,15 @@ if(MLLM_BUILD_SDK_C_BINDING) RUNTIME DESTINATION bin) endif() +if(MLLM_TRACY_ENABLE) + install( + TARGETS MllmTracy + EXPORT MllmTargets + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + RUNTIME DESTINATION bin) +endif() + install( DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/mllm/ DESTINATION include/mllm diff --git a/mllm/CMakeLists.txt b/mllm/CMakeLists.txt index c75928605..8d0dbca38 100644 --- a/mllm/CMakeLists.txt +++ b/mllm/CMakeLists.txt @@ -80,6 +80,7 @@ endif() if(MLLM_TRACY_ENABLE) add_subdirectory(tracy_perf) + target_link_libraries(MllmRT PUBLIC MllmTracy) endif() # Host backend will be build by default diff --git a/mllm/backends/cpu/CPUAllocator.cpp b/mllm/backends/cpu/CPUAllocator.cpp index 0358b85c3..24d8b6398 100644 --- a/mllm/backends/cpu/CPUAllocator.cpp +++ b/mllm/backends/cpu/CPUAllocator.cpp @@ -3,6 +3,7 @@ #include "mllm/backends/cpu/CPUAllocator.hpp" #include "mllm/backends/cpu/kernels/Kernels.hpp" +#include "mllm/tracy_perf/Tracy.hpp" namespace mllm::cpu { @@ -18,12 +19,20 @@ void align_alloc(void** ptr, size_t required_bytes, size_t align) { *ptr = nullptr; return; } +#if defined(MLLM_TRACY_ENABLE) && MLLM_TRACY_ENABLE == 1 + TracyAlloc(p1, required_bytes + offset); +#endif p2 = (void**)(((size_t)(p1) + offset) & ~(align - 1)); // NOLINT p2[-1] = p1; *ptr = p2; } -void align_free(void* ptr) { free(((void**)ptr)[-1]); } +void align_free(void* ptr) { +#if defined(MLLM_TRACY_ENABLE) && MLLM_TRACY_ENABLE == 1 + TracyFree(((void**)ptr)[-1]); +#endif + free(((void**)ptr)[-1]); +} bool CPUAllocator::alloc(Storage* storage) { void* ptr; diff --git a/mllm/backends/cpu/CPUDispatcher.cpp b/mllm/backends/cpu/CPUDispatcher.cpp index 1316ff258..65c71a47a 100644 --- a/mllm/backends/cpu/CPUDispatcher.cpp +++ b/mllm/backends/cpu/CPUDispatcher.cpp @@ -5,6 +5,7 @@ #include "mllm/engine/Dispatcher.hpp" #include "mllm/utils/Common.hpp" #include "mllm/nn/Module.hpp" +#include "mllm/tracy_perf/Tracy.hpp" #ifdef MLLM_PERFETTO_ENABLE #include "mllm/engine/Perf.hpp" @@ -39,6 +40,7 @@ TaskResult::sender_t CPUDispatcher::asyncReceive(const Task::ptr_t& task) { } void CPUDispatcher::process(const Task::ptr_t& task) { + MLLM_TRACY_ZONE_SCOPED; switch (task->type) { case TaskTypes::kExecuteOp: { #ifdef MLLM_PERFETTO_ENABLE diff --git a/mllm/engine/Context.cpp b/mllm/engine/Context.cpp index a16bb4b7d..c4e178b55 100644 --- a/mllm/engine/Context.cpp +++ b/mllm/engine/Context.cpp @@ -7,6 +7,7 @@ #include "mllm/engine/Context.hpp" #include "mllm/engine/SessionTCB.hpp" #include "mllm/engine/DispatcherManager.hpp" +#include "mllm/tracy_perf/Tracy.hpp" namespace mllm { @@ -42,6 +43,7 @@ Backend::ptr_t Context::getBackend(const DeviceTypes& device) { std::vector Context::buildOpAndSubmitTask(OpTypes op_type, const BaseOpOptionsBase& base_options, const std::vector& inputs, DeviceTypes special_device) { + MLLM_TRACY_ZONE_SCOPED; auto device = special_device != kDeviceTypes_End ? special_device : inputs[0].device(); // If input device and special device are different, prefer non-CPU device diff --git a/mllm/engine/DispatcherManager.cpp b/mllm/engine/DispatcherManager.cpp index 4e905e935..6ff0e62cf 100644 --- a/mllm/engine/DispatcherManager.cpp +++ b/mllm/engine/DispatcherManager.cpp @@ -5,6 +5,7 @@ #include "exec/static_thread_pool.hpp" #include "mllm/utils/Common.hpp" #include "mllm/engine/Context.hpp" +#include "mllm/tracy_perf/Tracy.hpp" namespace mllm { @@ -14,7 +15,10 @@ DispatcherManager::DispatcherManager(const DispatcherManagerOptions& options) exec::numa_policy numa{exec::no_numa_policy{}}; } -void DispatcherManager::submit(dispatcher_id_t id, const Task::ptr_t& task) { dispatchers_[id]->receive(task); } +void DispatcherManager::submit(dispatcher_id_t id, const Task::ptr_t& task) { + MLLM_TRACY_ZONE_SCOPED; + dispatchers_[id]->receive(task); +} TaskResult::sender_t DispatcherManager::asyncSubmit(dispatcher_id_t id, const Task::ptr_t& task) { return dispatchers_[id]->asyncReceive(task); diff --git a/mllm/engine/MemoryManager.cpp b/mllm/engine/MemoryManager.cpp index 0e02a19a0..5372ef0d7 100644 --- a/mllm/engine/MemoryManager.cpp +++ b/mllm/engine/MemoryManager.cpp @@ -3,6 +3,7 @@ #include "mllm/utils/Common.hpp" #include "mllm/engine/MemoryManager.hpp" +#include "mllm/tracy_perf/Tracy.hpp" #ifdef MLLM_PERFETTO_ENABLE #include "mllm/engine/Perf.hpp" @@ -25,6 +26,7 @@ void MemoryManager::registerAllocator(const DeviceTypes& device, const Allocator } void MemoryManager::alloc(Storage* s) { + MLLM_TRACY_ZONE_SCOPED; auto& allocator = allocators_[s->device_]; auto try_to_alloc_size = allocator->allocSize(s); @@ -58,6 +60,7 @@ void MemoryManager::alloc(Storage* s) { void MemoryManager::alloc(const std::shared_ptr& s) { alloc(s.get()); } void MemoryManager::free(Storage* s) { + MLLM_TRACY_ZONE_SCOPED; auto& allocator = allocators_[s->device_]; auto try_to_alloc_size = allocator->allocSize(s); diff --git a/mllm/tracy_perf/CMakeLists.txt b/mllm/tracy_perf/CMakeLists.txt index 9e418cbc5..1030682c8 100644 --- a/mllm/tracy_perf/CMakeLists.txt +++ b/mllm/tracy_perf/CMakeLists.txt @@ -1,5 +1,6 @@ if(MLLM_TRACY_ENABLE) add_library(MllmTracy SHARED Tracy.cpp) - target_link_libraries(MllmTracy PUBLIC tracy) + target_link_libraries(MllmTracy PUBLIC Tracy::TracyClient) + target_include_directories(MllmTracy PUBLIC ${MLLM_INCLUDE_DIR}) target_compile_definitions(MllmTracy PUBLIC MLLM_TRACY_ENABLE) endif() diff --git a/mllm/tracy_perf/Tracy.cpp b/mllm/tracy_perf/Tracy.cpp index 948a7a09b..0f86ce06d 100644 --- a/mllm/tracy_perf/Tracy.cpp +++ b/mllm/tracy_perf/Tracy.cpp @@ -1,3 +1,6 @@ +// Copyright (c) MLLM Team. +// Licensed under the MIT License. + #ifdef MLLM_TRACY_ENABLE -#include "tracy/Tracy.cpp" -#endif \ No newline at end of file +#include "mllm/tracy_perf/Tracy.hpp" +#endif diff --git a/mllm/tracy_perf/Tracy.hpp b/mllm/tracy_perf/Tracy.hpp index 72928f8ac..c35e0ab74 100644 --- a/mllm/tracy_perf/Tracy.hpp +++ b/mllm/tracy_perf/Tracy.hpp @@ -1,8 +1,10 @@ -#ifndef MLLM_TRACY_HPP -#define MLLM_TRACY_HPP +// Copyright (c) MLLM Team. +// Licensed under the MIT License. + +#pragma once #ifdef MLLM_TRACY_ENABLE -#include "tracy/Tracy.hpp" +#include #define MLLM_TRACY_ZONE_SCOPED ZoneScoped #define MLLM_TRACY_ZONE_SCOPED_NAMED(name) ZoneScopedN(name) #define MLLM_TRACY_FRAME_MARK FrameMark @@ -11,5 +13,3 @@ #define MLLM_TRACY_ZONE_SCOPED_NAMED(name) #define MLLM_TRACY_FRAME_MARK #endif - -#endif // MLLM_TRACY_HPP \ No newline at end of file diff --git a/tasks/build_osx_apple_silicon_accelerate.yaml b/tasks/build_osx_apple_silicon_accelerate.yaml index d76807053..6ea18b1cb 100644 --- a/tasks/build_osx_apple_silicon_accelerate.yaml +++ b/tasks/build_osx_apple_silicon_accelerate.yaml @@ -10,6 +10,7 @@ Tasks: - "-DMLLM_KERNEL_USE_THREADS=ON" - "-DMLLM_KERNEL_THREADS_VENDOR_OPENMP=OFF" - "-DMLLM_KERNEL_THREADS_VENDOR_APPLE_GCD=ON" + - "-DMLLM_TRACY_ENABLE=OFF" - CMakeBuildTask: cmake_cfg_path: "build-osx-accelerate"