diff --git a/examples/models/yolo12/CMakeLists.txt b/examples/models/yolo12/CMakeLists.txt index 60b11685bdf..ed9b8ca5d79 100644 --- a/examples/models/yolo12/CMakeLists.txt +++ b/examples/models/yolo12/CMakeLists.txt @@ -28,6 +28,11 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) # Let files say "include ". set(_common_include_directories ${EXECUTORCH_ROOT}/..) +#find dependencies +find_package(absl CONFIG REQUIRED PATHS ${EXECUTORCH_ROOT}/cmake-out) +find_package(re2 CONFIG REQUIRED PATHS ${EXECUTORCH_ROOT}/cmake-out) +find_package(tokenizers CONFIG REQUIRED PATHS ${EXECUTORCH_ROOT}/cmake-out) + # find `executorch` libraries Same as for gflags find_package(executorch CONFIG REQUIRED PATHS ${EXECUTORCH_ROOT}/cmake-out) executorch_target_link_options_shared_lib(executorch) @@ -38,21 +43,13 @@ list(APPEND link_libraries portable_ops_lib portable_kernels) executorch_target_link_options_shared_lib(portable_ops_lib) if(USE_XNNPACK_BACKEND) - set(xnnpack_backend_libs xnnpack_backend XNNPACK microkernels-prod) + set(xnnpack_backend_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod) list(APPEND link_libraries ${xnnpack_backend_libs}) executorch_target_link_options_shared_lib(xnnpack_backend) endif() if(USE_OPENVINO_BACKEND) - add_subdirectory(${EXECUTORCH_ROOT}/backends/openvino openvino_backend) - - target_include_directories( - openvino_backend - INTERFACE - ${CMAKE_CURRENT_BINARY_DIR}/../../include - ${CMAKE_CURRENT_BINARY_DIR}/../../include/executorch/runtime/core/portable_type/c10 - ${CMAKE_CURRENT_BINARY_DIR}/../../lib - ) + find_package(OpenVINO REQUIRED) list(APPEND link_libraries openvino_backend) executorch_target_link_options_shared_lib(openvino_backend) endif() diff --git a/examples/models/yolo12/inference.h b/examples/models/yolo12/inference.h index 467ef5ce0ca..b99a330fabf 100644 --- a/examples/models/yolo12/inference.h +++ b/examples/models/yolo12/inference.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -18,6 +19,8 @@ using executorch::extension::Module; using executorch::runtime::Error; using executorch::runtime::Result; +std::mutex execute_mutex; + struct Detection { int class_id{0}; std::string className{}; @@ -59,22 +62,24 @@ cv::Mat scale_with_padding( return result; } -std::vector infer_yolo_once( - Module& module, - cv::Mat input, - cv::Size img_dims, - const DetectionConfig yolo_config) { - int pad_x, pad_y; - float scale; - input = scale_with_padding(input, &pad_x, &pad_y, &scale, img_dims); - - cv::Mat blob; +std::shared_ptr prepare_input( + cv::Mat& input, + cv::Mat& blob, + cv::Size img_dims) { + cv::dnn::blobFromImage( input, blob, 1.0 / 255.0, img_dims, cv::Scalar(), true, false); const auto t_input = from_blob( (void*)blob.data, std::vector(blob.size.p, blob.size.p + blob.dims), ScalarType::Float); + return t_input; +} + +executorch::aten::Tensor execute_frame( + Module& module, + std::shared_ptr t_input) { + std::lock_guard lock(execute_mutex); const auto result = module.forward(t_input); ET_CHECK_MSG( @@ -82,9 +87,15 @@ std::vector infer_yolo_once( "Execution of method forward failed with status 0x%" PRIx32, (uint32_t)result.error()); - const auto t = result->at(0).toTensor(); // Using only the 0 output + return result->at(0).toTensor(); // Using only the 0 output // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + // box[x,y,w,h]) +} + +std::vector process_output( + executorch::aten::Tensor& t, + const DetectionConfig yolo_config, + int pad_x, int pad_y, float scale) { cv::Mat mat_output(t.dim() - 1, t.sizes().data() + 1, CV_32FC1, t.data_ptr()); std::vector class_ids; @@ -148,4 +159,5 @@ std::vector infer_yolo_once( return detections; } + #endif // INFERENCE_H diff --git a/examples/models/yolo12/main.cpp b/examples/models/yolo12/main.cpp index 95ea98d6634..ceb10f9a9a8 100644 --- a/examples/models/yolo12/main.cpp +++ b/examples/models/yolo12/main.cpp @@ -1,4 +1,6 @@ #include "inference.h" +#include +#include #include @@ -97,32 +99,94 @@ int main(int argc, char** argv) { unsigned long long iters = 0; // Show progress every 10% unsigned long long progress_bar_tick = std::round(video_lenght / 10); + + struct frame_ctx { + cv::Mat frame; + cv::Mat scaled_input; + cv::Mat blob; + int pad_x; + int pad_y; + float scale; + }; + std::queue ready_q; + std::queue>> scale_q; + std::queue>>> input_q; + std::queue>> execute_q; + std::queue>>> output_q; + const et_timestamp_t before_execute = et_pal_current_ticks(); + size_t frame_queue_size = 2; while (true) { cv::Mat frame; cap >> frame; - - if (frame.empty()) + + if (frame.empty() && ready_q.empty() && scale_q.empty() && input_q.empty() && execute_q.empty() && output_q.empty()) break; - const et_timestamp_t before_execute = et_pal_current_ticks(); - std::vector output = - infer_yolo_once(yolo_module, frame, img_dims, DEFAULT_YOLO_CONFIG); + if (!frame.empty()) { + frame_ctx *new_frame_ctx = new frame_ctx; + new_frame_ctx->frame = frame; + ready_q.push(new_frame_ctx); + } - for (auto& detection : output) { - draw_detection(frame, detection, cv::Scalar(0, 0, 255)); + while (!ready_q.empty() && scale_q.size() < frame_queue_size) { + frame_ctx *scale_f = ready_q.front(); + scale_q.push(std::make_pair(scale_f, std::async(std::launch::async, scale_with_padding, std::ref(scale_f->frame), &(scale_f->pad_x), &(scale_f->pad_y), &(scale_f->scale), img_dims))); + ready_q.pop(); } - const et_timestamp_t after_execute = et_pal_current_ticks(); - time_spent_executing += after_execute - before_execute; - iters++; - - if (!(iters % progress_bar_tick)) { - const int precent_ready = (100 * iters) / video_lenght; - std::cout << iters << " out of " << video_lenght - << " frames are are processed (" << precent_ready << "\%)" - << std::endl; + while (!scale_q.empty() && input_q.size() < frame_queue_size) { + auto status = scale_q.front().second.wait_for(std::chrono::milliseconds(1)); + if (status == std::future_status::ready) { + scale_q.front().first->scaled_input = scale_q.front().second.get(); + input_q.push(std::make_pair(scale_q.front().first, std::async(std::launch::async, prepare_input, std::ref(scale_q.front().first->scaled_input), std::ref(scale_q.front().first->blob), img_dims))); + scale_q.pop(); + } else { + break; + } + } + while (!input_q.empty() && execute_q.size() < frame_queue_size) { + auto status = input_q.front().second.wait_for(std::chrono::milliseconds(1)); + if (status == std::future_status::ready) { + std::shared_ptr prepared_input = input_q.front().second.get(); + execute_q.push(std::make_pair(input_q.front().first, std::async(std::launch::async, execute_frame, std::ref(yolo_module), prepared_input))); + input_q.pop(); + } else { + break; + } + } + while (!execute_q.empty() && output_q.size() < frame_queue_size) { + auto status = execute_q.front().second.wait_for(std::chrono::milliseconds(1)); + if (status == std::future_status::ready) { + executorch::aten::Tensor raw_output = execute_q.front().second.get(); + output_q.push(std::make_pair(execute_q.front().first, std::async(std::launch::async, process_output, std::ref(raw_output), DEFAULT_YOLO_CONFIG, execute_q.front().first->pad_x, execute_q.front().first->pad_y, execute_q.front().first->scale))); + execute_q.pop(); + } else { + break; + } + } + while (!output_q.empty()) { + auto status = output_q.front().second.wait_for(std::chrono::milliseconds(1)); + if (status == std::future_status::ready) { + std::vector output = output_q.front().second.get(); + for (auto& detection : output) { + draw_detection(output_q.front().first->frame, detection, cv::Scalar(0, 0, 255)); + } + iters++; + + if (!(iters % progress_bar_tick)) { + const int precent_ready = (100 * iters) / video_lenght; + std::cout << iters << " out of " << video_lenght + << " frames are are processed (" << precent_ready << "\%)" + << std::endl; + } + video.write(output_q.front().first->frame); + output_q.pop(); + } else { + break; + } } - video.write(frame); } + const et_timestamp_t after_execute = et_pal_current_ticks(); + time_spent_executing = after_execute - before_execute; const auto tick_ratio = et_pal_ticks_to_ns_multiplier(); constexpr auto NANOSECONDS_PER_MILLISECOND = 1000000; @@ -165,4 +229,4 @@ void draw_detection( cv::Scalar(0, 0, 0), 2, 0); -} \ No newline at end of file +} diff --git a/examples/models/yolo12/requirements.txt b/examples/models/yolo12/requirements.txt index de537f46170..383cf53aba9 100644 --- a/examples/models/yolo12/requirements.txt +++ b/examples/models/yolo12/requirements.txt @@ -1 +1 @@ -ultralytics==8.3.97 \ No newline at end of file +ultralytics==8.3.196