diff --git a/yolox_ros_cpp/README.md b/yolox_ros_cpp/README.md index c4a0ceb..e9487bf 100644 --- a/yolox_ros_cpp/README.md +++ b/yolox_ros_cpp/README.md @@ -2,6 +2,7 @@ ## Requirements - ROS2 Humble + - ros-humble-generate-parameter-library - OpenCV 4.x - OpenVINO 2021.* - TensorRT 8.x * @@ -60,7 +61,7 @@ cd ~/ros2_ws - Download model using the following script. - https://github.com/PINTO0309/PINTO_model_zoo/blob/main/132_YOLOX/download_nano.sh - `curl -s https://raw.githubusercontent.com/PINTO0309/PINTO_model_zoo/main/132_YOLOX/download_nano.sh | bash` - + - ONNX model copy to weight dir - `cp resouces_new/saved_model_yolox_tiny_480x640/yolox_tiny_480x640.onnx ./src/YOLOX-ROS/weights/onnx/` diff --git a/yolox_ros_cpp/yolox_cpp/CMakeLists.txt b/yolox_ros_cpp/yolox_cpp/CMakeLists.txt index 3639c6b..a4d9b57 100644 --- a/yolox_ros_cpp/yolox_cpp/CMakeLists.txt +++ b/yolox_ros_cpp/yolox_cpp/CMakeLists.txt @@ -46,14 +46,16 @@ endif() if(YOLOX_USE_TENSORRT) find_package(CUDA REQUIRED) - find_library(NVINFER NAMES nvinfer REQUIRED) - find_library(NVINFERPLUGIN NAMES nvinfer_plugin REQUIRED) - find_library(NVPARSERS NAMES nvparsers REQUIRED) - find_library(NVONNXPARSER NAMES nvonnxparser REQUIRED) + if (NOT CUDA_TOOLKIT_ROOT_DIR) + set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda) + endif() + find_path(TENSORRT_INCLUDE_DIR NvInfer.h + HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR} + PATH_SUFFIXES include) set(ENABLE_TENSORRT ON) set(TARGET_SRC src/yolox_tensorrt.cpp) - set(TARGET_LIBS nvinfer nvinfer_plugin nvparsers nvonnxparser) + set(TARGET_LIBS nvinfer nvinfer_plugin) set(TARGET_DPENDENCIES CUDA) endif() diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/coco_names.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/coco_names.hpp index 31cb6ec..61d2f03 100644 --- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/coco_names.hpp +++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/coco_names.hpp @@ -93,7 +93,7 @@ namespace yolox_cpp{ {0.857, 0.857, 0.857}, {0.000, 0.447, 0.741}, {0.314, 0.717, 0.741}, - {0.50, 0.5, 0} + {0.500, 0.500, 0.000} }; } #endif \ No newline at end of file diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp index 8debd14..fb68649 100644 --- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp +++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/core.hpp @@ -8,9 +8,7 @@ namespace yolox_cpp /** * @brief Define names based depends on Unicode path support */ -#define tcout std::cout #define file_name_t std::string -#define imread_t cv::imread struct Object { @@ -24,17 +22,21 @@ namespace yolox_cpp int grid0; int grid1; int stride; + GridAndStride(const int grid0_, const int grid1_, const int stride_) + : grid0(grid0_), grid1(grid1_), stride(stride_) + { + } }; class AbcYoloX { public: AbcYoloX() {} - AbcYoloX(float nms_th = 0.45, float conf_th = 0.3, - std::string model_version = "0.1.1rc0", - int num_classes = 80, bool p6 = false) + AbcYoloX(const float nms_th = 0.45, const float conf_th = 0.3, + const std::string &model_version = "0.1.1rc0", + const int num_classes = 80, const bool p6 = false) : nms_thresh_(nms_th), bbox_conf_thresh_(conf_th), - model_version_(model_version), num_classes_(num_classes), p6_(p6) + num_classes_(num_classes), p6_(p6), model_version_(model_version) { } virtual std::vector inference(const cv::Mat &frame) = 0; @@ -55,10 +57,10 @@ namespace yolox_cpp cv::Mat static_resize(const cv::Mat &img) { - float r = std::min(input_w_ / (img.cols * 1.0), input_h_ / (img.rows * 1.0)); + const float r = std::min(input_w_ / (img.cols * 1.0), input_h_ / (img.rows * 1.0)); // r = std::min(r, 1.0f); - int unpad_w = r * img.cols; - int unpad_h = r * img.rows; + const int unpad_w = r * img.cols; + const int unpad_h = r * img.rows; cv::Mat re(unpad_h, unpad_w, CV_8UC3); cv::resize(img, re, re.size()); cv::Mat out(input_h_, input_w_, CV_8UC3, cv::Scalar(114, 114, 114)); @@ -69,9 +71,9 @@ namespace yolox_cpp // for NCHW void blobFromImage(const cv::Mat &img, float *blob_data) { - size_t channels = 3; - size_t img_h = img.rows; - size_t img_w = img.cols; + const size_t channels = 3; + const size_t img_h = img.rows; + const size_t img_w = img.cols; if (this->model_version_ == "0.1.0") { for (size_t c = 0; c < channels; ++c) @@ -104,9 +106,9 @@ namespace yolox_cpp // for NHWC void blobFromImage_nhwc(const cv::Mat &img, float *blob_data) { - size_t channels = 3; - size_t img_h = img.rows; - size_t img_w = img.cols; + const size_t channels = 3; + const size_t img_h = img.rows; + const size_t img_w = img.cols; if (this->model_version_ == "0.1.0") { for (size_t i = 0; i < img_h * img_w; ++i) @@ -134,19 +136,19 @@ namespace yolox_cpp { for (auto stride : strides) { - int num_grid_w = target_w / stride; - int num_grid_h = target_h / stride; + const int num_grid_w = target_w / stride; + const int num_grid_h = target_h / stride; for (int g1 = 0; g1 < num_grid_h; ++g1) { for (int g0 = 0; g0 < num_grid_w; ++g0) { - grid_strides.push_back((GridAndStride){g0, g1, stride}); + grid_strides.emplace_back(g0, g1, stride); } } } } - void generate_yolox_proposals(const std::vector grid_strides, const float *feat_ptr, const float prob_threshold, std::vector &objects) + void generate_yolox_proposals(const std::vector &grid_strides, const float *feat_ptr, const float prob_threshold, std::vector &objects) { const int num_anchors = grid_strides.size(); @@ -158,13 +160,13 @@ namespace yolox_cpp const int basic_pos = anchor_idx * (num_classes_ + 5); - float box_objectness = feat_ptr[basic_pos + 4]; + const float box_objectness = feat_ptr[basic_pos + 4]; int class_id = 0; float max_class_score = 0.0; for (int class_idx = 0; class_idx < num_classes_; ++class_idx) { - float box_cls_score = feat_ptr[basic_pos + 5 + class_idx]; - float box_prob = box_objectness * box_cls_score; + const float box_cls_score = feat_ptr[basic_pos + 5 + class_idx]; + const float box_prob = box_objectness * box_cls_score; if (box_prob > max_class_score) { class_id = class_idx; @@ -176,12 +178,12 @@ namespace yolox_cpp // yolox/models/yolo_head.py decode logic // outputs[..., :2] = (outputs[..., :2] + grids) * strides // outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides - float x_center = (feat_ptr[basic_pos + 0] + grid0) * stride; - float y_center = (feat_ptr[basic_pos + 1] + grid1) * stride; - float w = exp(feat_ptr[basic_pos + 2]) * stride; - float h = exp(feat_ptr[basic_pos + 3]) * stride; - float x0 = x_center - w * 0.5f; - float y0 = y_center - h * 0.5f; + const float x_center = (feat_ptr[basic_pos + 0] + grid0) * stride; + const float y_center = (feat_ptr[basic_pos + 1] + grid1) * stride; + const float w = exp(feat_ptr[basic_pos + 2]) * stride; + const float h = exp(feat_ptr[basic_pos + 3]) * stride; + const float x0 = x_center - w * 0.5f; + const float y0 = y_center - h * 0.5f; Object obj; obj.rect.x = x0; @@ -197,7 +199,7 @@ namespace yolox_cpp float intersection_area(const Object &a, const Object &b) { - cv::Rect_ inter = a.rect & b.rect; + const cv::Rect_ inter = a.rect & b.rect; return inter.area(); } @@ -260,8 +262,8 @@ namespace yolox_cpp const Object &b = faceobjects[picked[j]]; // intersection over union - float inter_area = intersection_area(a, b); - float union_area = areas[i] + areas[picked[j]] - inter_area; + const float inter_area = intersection_area(a, b); + const float union_area = areas[i] + areas[picked[j]] - inter_area; // float IoU = inter_area / union_area if (inter_area / union_area > nms_threshold) keep = 0; @@ -272,7 +274,7 @@ namespace yolox_cpp } } - void decode_outputs(const float *prob, const std::vector grid_strides, + void decode_outputs(const float *prob, const std::vector &grid_strides, std::vector &objects, const float bbox_conf_thresh, const float scale, const int img_w, const int img_h) { @@ -312,4 +314,4 @@ namespace yolox_cpp } }; } -#endif \ No newline at end of file +#endif diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/utils.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/utils.hpp index ff45978..335b980 100644 --- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/utils.hpp +++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/utils.hpp @@ -8,18 +8,22 @@ #include "core.hpp" #include "coco_names.hpp" -namespace yolox_cpp{ - namespace utils{ +namespace yolox_cpp +{ + namespace utils + { - static std::vector read_class_labels_file(file_name_t file_name) + static std::vector read_class_labels_file(const file_name_t &file_name) { std::vector class_names; std::ifstream ifs(file_name); std::string buff; - if(ifs.fail()){ + if (ifs.fail()) + { return class_names; } - while (getline(ifs, buff)) { + while (getline(ifs, buff)) + { if (buff == "") continue; class_names.push_back(buff); @@ -27,20 +31,21 @@ namespace yolox_cpp{ return class_names; } - static void draw_objects(cv::Mat bgr, const std::vector& objects, const std::vector& class_names=COCO_CLASSES) + static void draw_objects(cv::Mat bgr, const std::vector &objects, const std::vector &class_names = COCO_CLASSES) { - for (size_t i = 0; i < objects.size(); i++) + for (const Object &obj : objects) { - const Object& obj = objects[i]; - - int color_index = obj.label % 80; + const int color_index = obj.label % 80; cv::Scalar color = cv::Scalar(color_list[color_index][0], color_list[color_index][1], color_list[color_index][2]); float c_mean = cv::mean(color)[0]; cv::Scalar txt_color; - if (c_mean > 0.5){ + if (c_mean > 0.5) + { txt_color = cv::Scalar(0, 0, 0); - }else{ + } + else + { txt_color = cv::Scalar(255, 255, 255); } @@ -68,4 +73,4 @@ namespace yolox_cpp{ } } } -#endif \ No newline at end of file +#endif diff --git a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp index 03962a4..c61b0ae 100644 --- a/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp +++ b/yolox_ros_cpp/yolox_cpp/include/yolox_cpp/yolox_tensorrt.hpp @@ -36,6 +36,7 @@ namespace yolox_cpp{ YoloXTensorRT(file_name_t path_to_engine, int device=0, float nms_th=0.45, float conf_th=0.3, std::string model_version="0.1.1rc0", int num_classes=80, bool p6=false); + ~YoloXTensorRT(); std::vector inference(const cv::Mat& frame) override; private: @@ -49,6 +50,7 @@ namespace yolox_cpp{ int output_size_; const int inputIndex_ = 0; const int outputIndex_ = 1; + void *inference_buffers_[2]; }; } // namespace yolox_cpp diff --git a/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp b/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp index 7b86c8c..4dd7151 100644 --- a/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp +++ b/yolox_ros_cpp/yolox_cpp/src/yolox_tensorrt.cpp @@ -1,12 +1,13 @@ #include "yolox_cpp/yolox_tensorrt.hpp" -namespace yolox_cpp{ +namespace yolox_cpp +{ YoloXTensorRT::YoloXTensorRT(file_name_t path_to_engine, int device, float nms_th, float conf_th, std::string model_version, int num_classes, bool p6) - :AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), - DEVICE_(device) + : AbcYoloX(nms_th, conf_th, model_version, num_classes, p6), + DEVICE_(device) { cudaSetDevice(this->DEVICE_); // create a model using the API directly and serialize it to a stream @@ -14,7 +15,8 @@ namespace yolox_cpp{ size_t size{0}; std::ifstream file(path_to_engine, std::ios::binary); - if (file.good()) { + if (file.good()) + { file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); @@ -22,7 +24,9 @@ namespace yolox_cpp{ assert(trtModelStream); file.read(trtModelStream, size); file.close(); - }else{ + } + else + { std::cerr << "invalid arguments path_to_engine: " << path_to_engine << std::endl; return; } @@ -35,28 +39,41 @@ namespace yolox_cpp{ assert(this->context_ != nullptr); delete[] trtModelStream; - auto input_dims = this->engine_->getBindingDimensions(0); + const auto input_name = this->engine_->getIOTensorName(this->inputIndex_); + const auto input_dims = this->engine_->getTensorShape(input_name); this->input_h_ = input_dims.d[2]; this->input_w_ = input_dims.d[3]; std::cout << "INPUT_HEIGHT: " << this->input_h_ << std::endl; std::cout << "INPUT_WIDTH: " << this->input_w_ << std::endl; - auto out_dims = this->engine_->getBindingDimensions(1); + const auto output_name = this->engine_->getIOTensorName(this->outputIndex_); + auto output_dims = this->engine_->getTensorShape(output_name); this->output_size_ = 1; - for(int j=0; joutput_size_ *= out_dims.d[j]; + for (int j = 0; j < output_dims.nbDims; ++j) + { + this->output_size_ *= output_dims.d[j]; } // Pointers to input and output device buffers to pass to engine. // Engine requires exactly IEngine::getNbBindings() number of buffers. - assert(this->engine_->getNbBindings() == 2); + assert(this->engine_->getNbIOTensors() == 2); // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() - assert(this->engine_->getBindingDataType(this->inputIndex_) == nvinfer1::DataType::kFLOAT); - assert(this->engine_->getBindingDataType(this->outputIndex_) == nvinfer1::DataType::kFLOAT); + assert(this->engine_->getTensorDataType(input_name) == nvinfer1::DataType::kFLOAT); + assert(this->engine_->getTensorDataType(output_name) == nvinfer1::DataType::kFLOAT); + + // Create GPU buffers on device + CHECK(cudaMalloc(&this->inference_buffers_[this->inputIndex_], 3 * this->input_h_ * this->input_w_ * sizeof(float))); + CHECK(cudaMalloc(&this->inference_buffers_[this->outputIndex_], this->output_size_ * sizeof(float))); + + assert(this->context_->setInputShape(input_name, input_dims)); + assert(this->context_->allInputDimensionsSpecified()); + + assert(this->context_->setTensorAddress(input_name, this->inference_buffers_[this->inputIndex_])); + assert(this->context_->setTensorAddress(output_name, this->inference_buffers_[this->outputIndex_])); // Prepare GridAndStrides - if(this->p6_) + if (this->p6_) { generate_grids_and_stride(this->input_w_, this->input_h_, this->strides_p6_, this->grid_strides_); } @@ -66,18 +83,24 @@ namespace yolox_cpp{ } } - std::vector YoloXTensorRT::inference(const cv::Mat& frame) + YoloXTensorRT::~YoloXTensorRT() + { + CHECK(cudaFree(inference_buffers_[this->inputIndex_])); + CHECK(cudaFree(inference_buffers_[this->outputIndex_])); + } + + std::vector YoloXTensorRT::inference(const cv::Mat &frame) { // preprocess auto pr_img = static_resize(frame); - float* input_blob = new float[pr_img.total()*3]; + float *input_blob = new float[pr_img.total() * 3]; blobFromImage(pr_img, input_blob); // inference - float* output_blob = new float[this->output_size_]; + float *output_blob = new float[this->output_size_]; this->doInference(input_blob, output_blob); - float scale = std::min(this->input_w_ / (frame.cols*1.0), this->input_h_ / (frame.rows*1.0)); + float scale = std::min(this->input_w_ / (frame.cols * 1.0), this->input_h_ / (frame.rows * 1.0)); std::vector objects; decode_outputs(output_blob, this->grid_strides_, objects, this->bbox_conf_thresh_, scale, frame.cols, frame.rows); @@ -87,31 +110,25 @@ namespace yolox_cpp{ return objects; } - void YoloXTensorRT::doInference(float* input, float* output) + void YoloXTensorRT::doInference(float *input, float *output) { - // Pointers to input and output device buffers to pass to engine. - // Engine requires exactly IEngine::getNbBindings() number of buffers. - void* buffers[2]; - - // Create GPU buffers on device - CHECK(cudaMalloc(&buffers[this->inputIndex_], 3 * this->input_h_ * this->input_w_ * sizeof(float))); - CHECK(cudaMalloc(&buffers[this->outputIndex_], this->output_size_ * sizeof(float))); - // Create stream cudaStream_t stream; CHECK(cudaStreamCreate(&stream)); // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host - CHECK(cudaMemcpyAsync(buffers[this->inputIndex_], input, 3 * this->input_h_ * this->input_w_ * sizeof(float), cudaMemcpyHostToDevice, stream)); - context_->enqueueV2(buffers, stream, nullptr); - CHECK(cudaMemcpyAsync(output, buffers[this->outputIndex_], this->output_size_ * sizeof(float), cudaMemcpyDeviceToHost, stream)); - cudaStreamSynchronize(stream); - - // Release stream and buffers - cudaStreamDestroy(stream); - CHECK(cudaFree(buffers[0])); - CHECK(cudaFree(buffers[1])); + CHECK(cudaMemcpyAsync(this->inference_buffers_[this->inputIndex_], input, 3 * this->input_h_ * this->input_w_ * sizeof(float), cudaMemcpyHostToDevice, stream)); + + bool success = context_->enqueueV3(stream); + if (!success) + throw std::runtime_error("failed inference"); + + CHECK(cudaMemcpyAsync(output, this->inference_buffers_[this->outputIndex_], this->output_size_ * sizeof(float), cudaMemcpyDeviceToHost, stream)); + + CHECK(cudaStreamSynchronize(stream)); + + // Release stream + CHECK(cudaStreamDestroy(stream)); } } // namespace yolox_cpp - diff --git a/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp b/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp index 36155a9..98556bc 100644 --- a/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp +++ b/yolox_ros_cpp/yolox_ros_cpp/include/yolox_ros_cpp/yolox_ros_cpp.hpp @@ -39,6 +39,6 @@ namespace yolox_ros_cpp{ rclcpp::Publisher::SharedPtr pub_bboxes_; image_transport::Publisher pub_image_; - bboxes_ex_msgs::msg::BoundingBoxes objects_to_bboxes(cv::Mat, std::vector, std_msgs::msg::Header); + bboxes_ex_msgs::msg::BoundingBoxes objects_to_bboxes(const cv::Mat&, const std::vector&, const std_msgs::msg::Header&); }; } diff --git a/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp b/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp index d69edf9..70f6e19 100644 --- a/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp +++ b/yolox_ros_cpp/yolox_ros_cpp/src/yolox_ros_cpp.cpp @@ -131,15 +131,17 @@ namespace yolox_ros_cpp pub_img = cv_bridge::CvImage(img->header, "bgr8", frame).toImageMsg(); this->pub_image_.publish(pub_img); } - bboxes_ex_msgs::msg::BoundingBoxes YoloXNode::objects_to_bboxes(cv::Mat frame, std::vector objects, std_msgs::msg::Header header) + + bboxes_ex_msgs::msg::BoundingBoxes YoloXNode::objects_to_bboxes( + const cv::Mat &frame, const std::vector &objects, const std_msgs::msg::Header &header) { bboxes_ex_msgs::msg::BoundingBoxes boxes; boxes.header = header; - for (auto obj : objects) + for (const auto &obj : objects) { bboxes_ex_msgs::msg::BoundingBox box; box.probability = obj.prob; - box.class_id = yolox_cpp::COCO_CLASSES[obj.label]; + box.class_id = this->class_names_[obj.label]; box.xmin = obj.rect.x; box.ymin = obj.rect.y; box.xmax = (obj.rect.x + obj.rect.width);