diff --git a/.gitignore b/.gitignore index f49b53b0e1a..72a72c2a28b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,14 +4,16 @@ cmake-build-debug cmake-build-release .vscode FastDeploy.cmake -fastdeploy/core/config.h build-debug.sh *dist fastdeploy.egg-info .setuptools-cmake-build fastdeploy/version.py +fastdeploy/core/config.h +fastdeploy/c_lib_wrap.py fastdeploy/LICENSE* fastdeploy/ThirdPartyNotices* *.so* fastdeploy/libs/third_libs -csrcs/fastdeploy/core/config.h \ No newline at end of file +csrcs/fastdeploy/core/config.h +csrcs/fastdeploy/pybind/main.cc \ No newline at end of file diff --git a/csrcs/fastdeploy/backends/paddle/util.cc b/csrcs/fastdeploy/backends/paddle/util.cc index b2df989d4a0..1ae5b3553e2 100644 --- a/csrcs/fastdeploy/backends/paddle/util.cc +++ b/csrcs/fastdeploy/backends/paddle/util.cc @@ -38,7 +38,10 @@ void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor) { void CopyTensorToCpu(std::unique_ptr& tensor, FDTensor* fd_tensor) { auto fd_dtype = PaddleDataTypeToFD(tensor->type()); - fd_tensor->Allocate(tensor->shape(), fd_dtype, tensor->name()); + std::vector shape; + auto tmp_shape = tensor->shape(); + shape.assign(tmp_shape.begin(), tmp_shape.end()); + fd_tensor->Allocate(shape, fd_dtype, tensor->name()); if (fd_tensor->dtype == FDDataType::FP32) { tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); return; diff --git a/csrcs/fastdeploy/vision.h b/csrcs/fastdeploy/vision.h index 4f1d55312a0..5670c1f2b69 100644 --- a/csrcs/fastdeploy/vision.h +++ b/csrcs/fastdeploy/vision.h @@ -35,6 +35,7 @@ #include "fastdeploy/vision/wongkinyiu/scaledyolov4.h" #include "fastdeploy/vision/wongkinyiu/yolor.h" #include "fastdeploy/vision/wongkinyiu/yolov7.h" +#include "fastdeploy/vision/zhkkke/modnet.h" #endif #include "fastdeploy/vision/visualize/visualize.h" diff --git a/csrcs/fastdeploy/vision/common/result.cc b/csrcs/fastdeploy/vision/common/result.cc index 8c68b57f585..854d6fcab22 100644 --- a/csrcs/fastdeploy/vision/common/result.cc +++ b/csrcs/fastdeploy/vision/common/result.cc @@ -225,5 +225,82 @@ std::string FaceRecognitionResult::Str() { return out; } +MattingResult::MattingResult(const MattingResult& res) { + alpha.assign(res.alpha.begin(), res.alpha.end()); + foreground.assign(res.foreground.begin(), res.foreground.end()); + shape.assign(res.shape.begin(), res.shape.end()); + contain_foreground = res.contain_foreground; +} + +void MattingResult::Clear() { + std::vector().swap(alpha); + std::vector().swap(foreground); + std::vector().swap(shape); + contain_foreground = false; +} + +void MattingResult::Reserve(int size) { + alpha.reserve(size); + if (contain_foreground) { + FDASSERT((shape.size() == 3), + "Please initial shape (h,w,c) before call Reserve."); + int c = static_cast(shape[3]); + foreground.reserve(size * c); + } +} + +void MattingResult::Resize(int size) { + alpha.resize(size); + if (contain_foreground) { + FDASSERT((shape.size() == 3), + "Please initial shape (h,w,c) before call Resize."); + int c = static_cast(shape[3]); + foreground.resize(size * c); + } +} + +std::string MattingResult::Str() { + std::string out; + out = "MattingResult["; + if (contain_foreground) { + out += "Foreground(true)"; + } else { + out += "Foreground(false)"; + } + out += ", Alpha("; + size_t numel = alpha.size(); + if (numel <= 0) { + return out + "[Empty Result]"; + } + // max, min, mean + float min_val = alpha.at(0); + float max_val = alpha.at(0); + float total_val = alpha.at(0); + for (size_t i = 1; i < numel; ++i) { + float val = alpha.at(i); + total_val += val; + if (val < min_val) { + min_val = val; + } + if (val > max_val) { + max_val = val; + } + } + float mean_val = total_val / static_cast(numel); + // shape + std::string shape_str = "Shape("; + for (size_t i = 0; i < shape.size(); ++i) { + if ((i + 1) != shape.size()) { + shape_str += std::to_string(shape[i]) + ","; + } else { + shape_str += std::to_string(shape[i]) + ")"; + } + } + out = out + "Numel(" + std::to_string(numel) + "), " + shape_str + ", Min(" + + std::to_string(min_val) + "), " + "Max(" + std::to_string(max_val) + + "), " + "Mean(" + std::to_string(mean_val) + "))]\n"; + return out; +} + } // namespace vision } // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/result.h b/csrcs/fastdeploy/vision/common/result.h index 5d0fd549782..f57178cee79 100644 --- a/csrcs/fastdeploy/vision/common/result.h +++ b/csrcs/fastdeploy/vision/common/result.h @@ -23,7 +23,8 @@ enum FASTDEPLOY_DECL ResultType { DETECTION, SEGMENTATION, FACE_DETECTION, - FACE_RECOGNITION + FACE_RECOGNITION, + MATTING }; struct FASTDEPLOY_DECL BaseResult { @@ -119,5 +120,29 @@ struct FASTDEPLOY_DECL FaceRecognitionResult : public BaseResult { std::string Str(); }; +struct FASTDEPLOY_DECL MattingResult : public BaseResult { + // alpha matte and fgr (predicted foreground: HWC/BGR float32) + std::vector alpha; // h x w + std::vector foreground; // h x w x c (c=3 default) + // height, width, channel for foreground and alpha + // must be (h,w,c) and setup before Reserve and Resize + // c is only for foreground if contain_foreground is true. + std::vector shape; + bool contain_foreground = false; + + ResultType type = ResultType::MATTING; + + MattingResult() {} + MattingResult(const MattingResult& res); + + void Clear(); + + void Reserve(int size); + + void Resize(int size); + + std::string Str(); +}; + } // namespace vision } // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/vision_pybind.cc b/csrcs/fastdeploy/vision/vision_pybind.cc index 1e171b022ed..dfff80d1fe2 100644 --- a/csrcs/fastdeploy/vision/vision_pybind.cc +++ b/csrcs/fastdeploy/vision/vision_pybind.cc @@ -29,6 +29,7 @@ void BindLinzaer(pybind11::module& m); void BindBiubug6(pybind11::module& m); void BindPpogg(pybind11::module& m); void BindDeepInsight(pybind11::module& m); +void BindZHKKKe(pybind11::module& m); #ifdef ENABLE_VISION_VISUALIZE void BindVisualize(pybind11::module& m); #endif @@ -74,6 +75,15 @@ void BindVision(pybind11::module& m) { .def("__repr__", &vision::FaceRecognitionResult::Str) .def("__str__", &vision::FaceRecognitionResult::Str); + pybind11::class_(m, "MattingResult") + .def(pybind11::init()) + .def_readwrite("alpha", &vision::MattingResult::alpha) + .def_readwrite("foreground", &vision::MattingResult::foreground) + .def_readwrite("shape", &vision::MattingResult::shape) + .def_readwrite("contain_foreground", &vision::MattingResult::shape) + .def("__repr__", &vision::MattingResult::Str) + .def("__str__", &vision::MattingResult::Str); + BindPPCls(m); BindPPDet(m); BindPPSeg(m); @@ -87,6 +97,7 @@ void BindVision(pybind11::module& m) { BindBiubug6(m); BindPpogg(m); BindDeepInsight(m); + BindZHKKKe(m); #ifdef ENABLE_VISION_VISUALIZE BindVisualize(m); #endif diff --git a/csrcs/fastdeploy/vision/visualize/matting_alpha.cc b/csrcs/fastdeploy/vision/visualize/matting_alpha.cc new file mode 100644 index 00000000000..f1eaa0cfaee --- /dev/null +++ b/csrcs/fastdeploy/vision/visualize/matting_alpha.cc @@ -0,0 +1,123 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef ENABLE_VISION_VISUALIZE + +#include "fastdeploy/vision/visualize/visualize.h" +#include "opencv2/highgui.hpp" +#include "opencv2/imgproc/imgproc.hpp" + +namespace fastdeploy { +namespace vision { + +static void RemoveSmallConnectedArea(cv::Mat* alpha_pred, + float threshold = 0.05f) { + // 移除小的联通区域和噪点 开闭合形态学处理 + // 假设输入的是透明度alpha, 值域(0.,1.) + cv::Mat gray, binary; + (*alpha_pred).convertTo(gray, CV_8UC1, 255.f); + // 255 * 0.05 ~ 13 + unsigned int binary_threshold = static_cast(255.f * threshold); + cv::threshold(gray, binary, binary_threshold, 255, cv::THRESH_BINARY); + // morphologyEx with OPEN operation to remove noise first. + auto kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3), + cv::Point(-1, -1)); + cv::morphologyEx(binary, binary, cv::MORPH_OPEN, kernel); + // Computationally connected domain + cv::Mat labels = cv::Mat::zeros((*alpha_pred).size(), CV_32S); + cv::Mat stats, centroids; + int num_labels = + cv::connectedComponentsWithStats(binary, labels, stats, centroids, 8, 4); + if (num_labels <= 1) { + // no noise, skip. + return; + } + // find max connected area, 0 is background + int max_connected_id = 1; // 1,2,... + int max_connected_area = stats.at(max_connected_id, cv::CC_STAT_AREA); + for (int i = 1; i < num_labels; ++i) { + int tmp_connected_area = stats.at(i, cv::CC_STAT_AREA); + if (tmp_connected_area > max_connected_area) { + max_connected_area = tmp_connected_area; + max_connected_id = i; + } + } + const int h = (*alpha_pred).rows; + const int w = (*alpha_pred).cols; + // remove small connected area. + for (int i = 0; i < h; ++i) { + int* label_row_ptr = labels.ptr(i); + float* alpha_row_ptr = (*alpha_pred).ptr(i); + for (int j = 0; j < w; ++j) { + if (label_row_ptr[j] != max_connected_id) alpha_row_ptr[j] = 0.f; + } + } +} + +void Visualize::VisMattingAlpha(const cv::Mat& im, const MattingResult& result, + cv::Mat* vis_img, + bool remove_small_connected_area) { + // 只可视化alpha,fgr(前景)本身就是一张图 不需要可视化 + FDASSERT((!im.empty()), "im can't be empty!"); + FDASSERT((im.channels() == 3), "Only support 3 channels mat!"); + int out_h = static_cast(result.shape[0]); + int out_w = static_cast(result.shape[1]); + int height = im.rows; + int width = im.cols; + // alpha to cv::Mat && 避免resize等操作修改外部数据 + std::vector alpha_copy; + alpha_copy.assign(result.alpha.begin(), result.alpha.end()); + float* alpha_ptr = static_cast(alpha_copy.data()); + cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr); + if (remove_small_connected_area) { + RemoveSmallConnectedArea(&alpha, 0.05f); + } + if ((out_h != height) || (out_w != width)) { + cv::resize(alpha, alpha, cv::Size(width, height)); + } + + int vis_h = (*vis_img).rows; + int vis_w = (*vis_img).cols; + + if ((vis_h != height) || (vis_w != width)) { + // faster than resize + (*vis_img) = cv::Mat::zeros(height, width, CV_8UC3); + } + if ((*vis_img).type() != CV_8UC3) { + (*vis_img).convertTo((*vis_img), CV_8UC3); + } + + uchar* vis_data = static_cast(vis_img->data); + uchar* im_data = static_cast(im.data); + float* alpha_data = reinterpret_cast(alpha.data); + + for (size_t i = 0; i < height; ++i) { + for (size_t j = 0; j < width; ++j) { + float alpha_val = alpha_data[i * width + j]; + vis_data[i * width * 3 + j * 3 + 0] = cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 0]) * alpha_val + + (1.f - alpha_val) * 153.f); + vis_data[i * width * 3 + j * 3 + 1] = cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 1]) * alpha_val + + (1.f - alpha_val) * 255.f); + vis_data[i * width * 3 + j * 3 + 2] = cv::saturate_cast( + static_cast(im_data[i * width * 3 + j * 3 + 2]) * alpha_val + + (1.f - alpha_val) * 120.f); + } + } +} + +} // namespace vision +} // namespace fastdeploy +#endif diff --git a/csrcs/fastdeploy/vision/visualize/visualize.h b/csrcs/fastdeploy/vision/visualize/visualize.h index f0fab5ee912..2ddb896937a 100644 --- a/csrcs/fastdeploy/vision/visualize/visualize.h +++ b/csrcs/fastdeploy/vision/visualize/visualize.h @@ -32,6 +32,9 @@ class FASTDEPLOY_DECL Visualize { static void VisSegmentation(const cv::Mat& im, const SegmentationResult& result, cv::Mat* vis_img, const int& num_classes = 1000); + static void VisMattingAlpha(const cv::Mat& im, const MattingResult& result, + cv::Mat* vis_img, + bool remove_small_connected_area = false); }; } // namespace vision diff --git a/csrcs/fastdeploy/vision/visualize/visualize_pybind.cc b/csrcs/fastdeploy/vision/visualize/visualize_pybind.cc index 4e12f55c8c8..a50b9de8c1b 100644 --- a/csrcs/fastdeploy/vision/visualize/visualize_pybind.cc +++ b/csrcs/fastdeploy/vision/visualize/visualize_pybind.cc @@ -33,13 +33,23 @@ void BindVisualize(pybind11::module& m) { vision::Visualize::VisFaceDetection(&im, result, line_size, font_size); }) - .def_static("vis_segmentation", [](pybind11::array& im_data, - vision::SegmentationResult& result, - pybind11::array& vis_im_data, - const int& num_classes) { - cv::Mat im = PyArrayToCvMat(im_data); - cv::Mat vis_im = PyArrayToCvMat(vis_im_data); - vision::Visualize::VisSegmentation(im, result, &vis_im, num_classes); - }); + .def_static( + "vis_segmentation", + [](pybind11::array& im_data, vision::SegmentationResult& result, + pybind11::array& vis_im_data, const int& num_classes) { + cv::Mat im = PyArrayToCvMat(im_data); + cv::Mat vis_im = PyArrayToCvMat(vis_im_data); + vision::Visualize::VisSegmentation(im, result, &vis_im, + num_classes); + }) + .def_static( + "vis_matting_alpha", + [](pybind11::array& im_data, vision::MattingResult& result, + pybind11::array& vis_im_data, bool remove_small_connected_area) { + cv::Mat im = PyArrayToCvMat(im_data); + cv::Mat vis_im = PyArrayToCvMat(vis_im_data); + vision::Visualize::VisMattingAlpha(im, result, &vis_im, + remove_small_connected_area); + }); } } // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/zhkkke/modnet.cc b/csrcs/fastdeploy/vision/zhkkke/modnet.cc new file mode 100644 index 00000000000..2f502a36590 --- /dev/null +++ b/csrcs/fastdeploy/vision/zhkkke/modnet.cc @@ -0,0 +1,175 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/zhkkke/modnet.h" +#include "fastdeploy/utils/perf.h" +#include "fastdeploy/vision/utils/utils.h" + +namespace fastdeploy { + +namespace vision { + +namespace zhkkke { + +MODNet::MODNet(const std::string& model_file, const std::string& params_file, + const RuntimeOption& custom_option, + const Frontend& model_format) { + if (model_format == Frontend::ONNX) { + valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 + valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + } + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +bool MODNet::Initialize() { + // parameters for preprocess + size = {256, 256}; + alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; + beta = {-1.f, -1.f, -1.f}; // RGB + swap_rb = true; + + if (!InitRuntime()) { + FDERROR << "Failed to initialize fastdeploy backend." << std::endl; + return false; + } + return true; +} + +bool MODNet::Preprocess(Mat* mat, FDTensor* output, + std::map>* im_info) { + // 1. Resize + // 2. BGR2RGB + // 3. Convert(opencv style) or Normalize + // 4. HWC2CHW + int resize_w = size[0]; + int resize_h = size[1]; + if (resize_h != mat->Height() || resize_w != mat->Width()) { + Resize::Run(mat, resize_w, resize_h); + } + if (swap_rb) { + BGR2RGB::Run(mat); + } + + Convert::Run(mat, alpha, beta); + // Record output shape of preprocessed image + (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; + + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + + mat->ShareWithTensor(output); + output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c + return true; +} + +bool MODNet::Postprocess( + std::vector& infer_result, MattingResult* result, + const std::map>& im_info) { + FDASSERT((infer_result.size() == 1), + "The default number of output tensor must be 1 according to " + "modnet."); + FDTensor& alpha_tensor = infer_result.at(0); // (1,h,w,1) + FDASSERT((alpha_tensor.shape[0] == 1), "Only support batch =1 now."); + if (alpha_tensor.dtype != FDDataType::FP32) { + FDERROR << "Only support post process with float32 data." << std::endl; + return false; + } + + // 先获取alpha并resize (使用opencv) + auto iter_ipt = im_info.find("input_shape"); + auto iter_out = im_info.find("output_shape"); + FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), + "Cannot find input_shape or output_shape from im_info."); + int out_h = iter_out->second[0]; + int out_w = iter_out->second[1]; + int ipt_h = iter_ipt->second[0]; + int ipt_w = iter_ipt->second[1]; + + // TODO: 需要修改成FDTensor或Mat的运算 现在依赖cv::Mat + float* alpha_ptr = static_cast(alpha_tensor.Data()); + cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr); + Mat alpha_resized(alpha_zero_copy_ref); // ref-only, zero copy. + if ((out_h != ipt_h) || (out_w != ipt_w)) { + // already allocated a new continuous memory after resize. + // cv::resize(alpha_resized, alpha_resized, cv::Size(ipt_w, ipt_h)); + Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1); + } + + result->Clear(); + // note: must be setup shape before Resize + result->contain_foreground = false; + // 和输入原图大小对应的alpha + result->shape = {static_cast(ipt_h), static_cast(ipt_w)}; + int numel = ipt_h * ipt_w; + int nbytes = numel * sizeof(float); + result->Resize(numel); + std::memcpy(result->alpha.data(), alpha_resized.GetCpuMat()->data, nbytes); + return true; +} + +bool MODNet::Predict(cv::Mat* im, MattingResult* result) { +#ifdef FASTDEPLOY_DEBUG + TIMERECORD_START(0) +#endif + + Mat mat(*im); + std::vector input_tensors(1); + + std::map> im_info; + // Record the shape of image and the shape of preprocessed image + im_info["input_shape"] = {mat.Height(), mat.Width()}; + im_info["output_shape"] = {mat.Height(), mat.Width()}; + + if (!Preprocess(&mat, &input_tensors[0], &im_info)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + +#ifdef FASTDEPLOY_DEBUG + TIMERECORD_END(0, "Preprocess") + TIMERECORD_START(1) +#endif + + input_tensors[0].name = InputInfoOfRuntime(0).name; + std::vector output_tensors; + if (!Infer(input_tensors, &output_tensors)) { + FDERROR << "Failed to inference." << std::endl; + return false; + } +#ifdef FASTDEPLOY_DEBUG + TIMERECORD_END(1, "Inference") + TIMERECORD_START(2) +#endif + + if (!Postprocess(output_tensors, result, im_info)) { + FDERROR << "Failed to post process." << std::endl; + return false; + } + +#ifdef FASTDEPLOY_DEBUG + TIMERECORD_END(2, "Postprocess") +#endif + return true; +} + +} // namespace zhkkke +} // namespace vision +} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/zhkkke/modnet.h b/csrcs/fastdeploy/vision/zhkkke/modnet.h new file mode 100644 index 00000000000..e9631098bb4 --- /dev/null +++ b/csrcs/fastdeploy/vision/zhkkke/modnet.h @@ -0,0 +1,70 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/vision/common/result.h" + +namespace fastdeploy { + +namespace vision { + +namespace zhkkke { + +class FASTDEPLOY_DECL MODNet : public FastDeployModel { + public: + // 当model_format为ONNX时,无需指定params_file + // 当model_format为Paddle时,则需同时指定model_file & params_file + MODNet(const std::string& model_file, const std::string& params_file = "", + const RuntimeOption& custom_option = RuntimeOption(), + const Frontend& model_format = Frontend::ONNX); + + // 定义模型的名称 + std::string ModelName() const { return "ZHKKKe/MODNet"; } + + // 以下为一些可供用户修改的属性 + // tuple of (width, height), default (256, 256) + std::vector size; + // 归一化的 alpha 和 beta,x'=x*alpha+beta + std::vector alpha; + std::vector beta; + // whether to swap the B and R channel, such as BGR->RGB, default true. + bool swap_rb; + + // 模型预测接口,即用户调用的接口 + // im 为用户的输入数据,目前对于CV均定义为cv::Mat + // result 为模型预测的输出结构体 + bool Predict(cv::Mat* im, MattingResult* result); + + private: + // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 + bool Initialize(); + + // 输入图像预处理操作 + // Mat为FastDeploy定义的数据结构 + // FDTensor为预处理后的Tensor数据,传给后端进行推理 + bool Preprocess(Mat* mat, FDTensor* output, + std::map>* im_info); + + // 后端推理结果后处理,输出给用户 + // infer_result 为后端推理后的输出Tensor + // result 为模型预测的结果 + bool Postprocess(std::vector& infer_result, MattingResult* result, + const std::map>& im_info); +}; + +} // namespace zhkkke +} // namespace vision +} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/zhkkke/zhkkke_pybind.cc b/csrcs/fastdeploy/vision/zhkkke/zhkkke_pybind.cc new file mode 100644 index 00000000000..e884a8ee071 --- /dev/null +++ b/csrcs/fastdeploy/vision/zhkkke/zhkkke_pybind.cc @@ -0,0 +1,37 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { +void BindZHKKKe(pybind11::module& m) { + auto zhkkke_module = m.def_submodule("zhkkke", "https://github.com/ZHKKKe"); + // Bind MODNet + pybind11::class_(zhkkke_module, + "MODNet") + .def(pybind11::init()) + .def("predict", + [](vision::zhkkke::MODNet& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + vision::MattingResult res; + self.Predict(&mat, &res); + return res; + }) + .def_readwrite("size", &vision::zhkkke::MODNet::size) + .def_readwrite("alpha", &vision::zhkkke::MODNet::alpha) + .def_readwrite("beta", &vision::zhkkke::MODNet::beta) + .def_readwrite("swap_rb", &vision::zhkkke::MODNet::swap_rb); +} + +} // namespace fastdeploy diff --git a/examples/vision/zhkkke_modnet.cc b/examples/vision/zhkkke_modnet.cc new file mode 100644 index 00000000000..ee9f8df2571 --- /dev/null +++ b/examples/vision/zhkkke_modnet.cc @@ -0,0 +1,58 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +int main() { + namespace vis = fastdeploy::vision; + + std::string model_file = + "../resources/models/modnet_photographic_portrait_matting.onnx"; + std::string img_path = "../resources/images/matting_1.jpg"; + std::string vis_path = "../resources/outputs/zhkkke_modnet_vis_result.jpg"; + + auto model = vis::zhkkke::MODNet(model_file); + if (!model.Initialized()) { + std::cerr << "Init Failed! Model: " << model_file << std::endl; + return -1; + } else { + std::cout << "Init Done! Model:" << model_file << std::endl; + } + model.EnableDebug(); + + // 设置推理size, 必须和模型文件支持的 + model.size = {256, 256}; + + cv::Mat im = cv::imread(img_path); + cv::Mat im_old = im.clone(); + cv::Mat vis_im = im.clone(); + + vis::MattingResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Prediction Failed." << std::endl; + return -1; + } + std::cout << "Prediction Done!" << std::endl; + + // 输出预测结果 + std::cout << res.Str() << std::endl; + + // 可视化预测结果 + bool remove_small_connected_area = true; + vis::Visualize::VisMattingAlpha(im_old, res, &vis_im, + remove_small_connected_area); + cv::imwrite(vis_path, vis_im); + std::cout << "Detect Done! Saved: " << vis_path << std::endl; + return 0; +} diff --git a/fastdeploy/vision/__init__.py b/fastdeploy/vision/__init__.py index 223b7b1a886..fcf75c293a1 100644 --- a/fastdeploy/vision/__init__.py +++ b/fastdeploy/vision/__init__.py @@ -28,3 +28,4 @@ from . import biubug6 from . import ppogg from . import deepinsight +from . import zhkkke diff --git a/fastdeploy/vision/visualize/__init__.py b/fastdeploy/vision/visualize/__init__.py index 9fac0ab9abb..65607308d26 100644 --- a/fastdeploy/vision/visualize/__init__.py +++ b/fastdeploy/vision/visualize/__init__.py @@ -29,3 +29,11 @@ def vis_face_detection(im_data, face_det_result, line_size=1, font_size=0.5): def vis_segmentation(im_data, seg_result, vis_im_data, num_classes=1000): C.vision.Visualize.vis_segmentation(im_data, seg_result, vis_im_data, num_classes) + + +def vis_matting_alpha(im_data, + matting_result, + vis_im_data, + remove_small_connected_area=False): + C.vision.Visualize.vis_matting_alpha(im_data, matting_result, vis_im_data, + remove_small_connected_area) diff --git a/fastdeploy/vision/zhkkke/__init__.py b/fastdeploy/vision/zhkkke/__init__.py new file mode 100644 index 00000000000..b301777c589 --- /dev/null +++ b/fastdeploy/vision/zhkkke/__init__.py @@ -0,0 +1,88 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import logging +from ... import FastDeployModel, Frontend +from ... import c_lib_wrap as C + + +class MODNet(FastDeployModel): + def __init__(self, + model_file, + params_file="", + runtime_option=None, + model_format=Frontend.ONNX): + # 调用基函数进行backend_option的初始化 + # 初始化后的option保存在self._runtime_option + super(MODNet, self).__init__(runtime_option) + + self._model = C.vision.zhkkke.MODNet( + model_file, params_file, self._runtime_option, model_format) + # 通过self.initialized判断整个模型的初始化是否成功 + assert self.initialized, "MODNet initialize failed." + + def predict(self, input_image): + return self._model.predict(input_image) + + # 一些跟模型有关的属性封装 + # 多数是预处理相关,可通过修改如model.size = [256, 256]改变预处理时resize的大小(前提是模型支持) + @property + def size(self): + return self._model.size + + @property + def alpha(self): + return self._model.alpha + + @property + def beta(self): + return self._model.beta + + @property + def swap_rb(self): + return self._model.swap_rb + + @size.setter + def size(self, wh): + assert isinstance(wh, (list, tuple)),\ + "The value to set `size` must be type of tuple or list." + assert len(wh) == 2,\ + "The value to set `size` must contatins 2 elements means [width, height], but now it contains {} elements.".format( + len(wh)) + self._model.size = wh + + @alpha.setter + def alpha(self, value): + assert isinstance(value, (list, tuple)),\ + "The value to set `alpha` must be type of tuple or list." + assert len(value) == 3,\ + "The value to set `alpha` must contatins 3 elements for each channels, but now it contains {} elements.".format( + len(value)) + self._model.alpha = value + + @beta.setter + def beta(self, value): + assert isinstance(value, (list, tuple)),\ + "The value to set `beta` must be type of tuple or list." + assert len(value) == 3,\ + "The value to set `beta` must contatins 3 elements for each channels, but now it contains {} elements.".format( + len(value)) + self._model.beta = value + + @swap_rb.setter + def swap_rb(self, value): + assert isinstance( + value, bool), "The value to set `swap_rb` must be type of bool." + self._model.swap_rb = value diff --git a/model_zoo/vision/arcface/README.md b/model_zoo/vision/arcface/README.md index d2932cc1bcf..478b695c1cb 100644 --- a/model_zoo/vision/arcface/README.md +++ b/model_zoo/vision/arcface/README.md @@ -31,7 +31,7 @@ fastdeploy支持 [insightface](https://github.com/deepinsight/insightface/tree/m * 导出onnx格式文件 ```bash - PYTHONPATH=. python ./torch2onnx.py partial_fc/pytorch/ms1mv3_arcface_r100_fp16/backbone.pth --output ms1mv3_arcface_r100.onnx --network r100 --simplify 1 + PYTHONPATH=. python ./torch2onnx.py ms1mv3_arcface_r100_fp16/backbone.pth --output ms1mv3_arcface_r100.onnx --network r100 --simplify 1 ``` * 移动onnx文件到model_zoo/arcface的目录 ```bash diff --git a/model_zoo/vision/arcface/cpp/README.md b/model_zoo/vision/arcface/cpp/README.md index 71b58912741..bb7145d328c 100644 --- a/model_zoo/vision/arcface/cpp/README.md +++ b/model_zoo/vision/arcface/cpp/README.md @@ -30,7 +30,7 @@ fastdeploy支持 [insightface](https://github.com/deepinsight/insightface/tree/m * 导出onnx格式文件 ```bash - PYTHONPATH=. python ./torch2onnx.py partial_fc/pytorch/ms1mv3_arcface_r100_fp16/backbone.pth --output ms1mv3_arcface_r100.onnx --network r100 --simplify 1 + PYTHONPATH=. python ./torch2onnx.py ms1mv3_arcface_r100_fp16/backbone.pth --output ms1mv3_arcface_r100.onnx --network r100 --simplify 1 ``` * 移动onnx文件到model_zoo/arcface的目录 ```bash diff --git a/model_zoo/vision/modnet/README.md b/model_zoo/vision/modnet/README.md new file mode 100644 index 00000000000..90b3fafdc15 --- /dev/null +++ b/model_zoo/vision/modnet/README.md @@ -0,0 +1,67 @@ +# MODNet 部署示例 + +## 0. 简介 +当前支持模型版本为:[MODNet CommitID:28165a4](https://github.com/ZHKKKe/MODNet/commit/28165a4) + +本文档说明如何进行[MODNet](https://github.com/ZHKKKe/MODNet) 的快速部署推理。本目录结构如下 + +``` +. +├── cpp # C++ 代码目录 +│   ├── CMakeLists.txt # C++ 代码编译CMakeLists文件 +│   ├── README.md # C++ 代码编译部署文档 +│   └── modnet.cc # C++ 示例代码 +├── api.md # API 说明文档 +├── README.md # MODNet 部署文档 +└── modnet.py # Python示例代码 +``` + +## 1. 获取ONNX文件 + +访问[MODNet](https://github.com/ZHKKKe/MODNet)官方github库,按照指引下载安装,下载模型文件,利用 `onnx/export_onnx.py` 得到`onnx`格式文件。 + +* 导出onnx格式文件 + ```bash + python -m onnx.export_onnx \ + --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \ + --output-path=pretrained/modnet_photographic_portrait_matting.onnx + ``` +* 移动onnx文件到model_zoo/modnet的目录 + ```bash + cp PATH/TO/modnet_photographic_portrait_matting.onnx PATH/TO/model_zoo/vision/modnet/ + ``` + + +## 2. 准备测试图片 +准备1张仅包含人像的测试图片,命名为matting_1.jpg,并拷贝到可执行文件所在的目录,比如 +```bash +matting_1.jpg +``` + +## 3. 安装FastDeploy + +使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` +```bash +# 安装fastdeploy-python工具 +pip install fastdeploy-python + +# 安装vision-cpu模块 +fastdeploy install vision-cpu +``` + +## 4. Python部署 + +执行如下代码即会自动下载MODNet模型和测试图片 +```bash +python modnet.py +``` + +执行完成后会输出检测结果如下, 可视化结果保存在`vis_result.jpg`中 +``` +MattingResult[Foreground(false), Alpha(Numel(65536), Shape(256,256), Min(0.000000), Max(1.000000), Mean(0.464415))] +``` + +## 5. 其它文档 + +- [C++部署](./cpp/README.md) +- [MODNet API文档](./api.md) diff --git a/model_zoo/vision/modnet/api.md b/model_zoo/vision/modnet/api.md new file mode 100644 index 00000000000..164ef099e3a --- /dev/null +++ b/model_zoo/vision/modnet/api.md @@ -0,0 +1,72 @@ +# MODNet API说明 + +## 1. Python API + +### 1.1 MODNet 类 + +#### 1.1.1 类初始化说明 +```python +fastdeploy.vision.zhkkke.MODNet(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) +``` +MODNet模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`xxx.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 + +**参数** + +> * **model_file**(str): 模型文件路径 +> * **params_file**(str): 参数文件路径 +> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 +> * **model_format**(Frontend): 模型格式 + +#### 1.1.2 predict函数 +> ```python +> MODNet.predict(image_data) +> ``` +> 模型预测结口,输入图像直接输出检测结果。 +> +> **参数** +> +> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 + +示例代码参考[modnet.py](./modnet.py) + + +## 2. C++ API + +### 2.1 MODNet 类 +#### 2.1.1 类初始化说明 +```C++ +fastdeploy::vision::zhkkke::MODNet( + const string& model_file, + const string& params_file = "", + const RuntimeOption& runtime_option = RuntimeOption(), + const Frontend& model_format = Frontend::ONNX) +``` +MODNet模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`xxx.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 + +**参数** + +> * **model_file**(str): 模型文件路径 +> * **params_file**(str): 参数文件路径 +> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 +> * **model_format**(Frontend): 模型格式 + +#### 2.1.2 Predict函数 +> ```C++ +> MODNet::Predict(cv::Mat* im, MattingResult* result) +> ``` +> 模型预测接口,输入图像直接输出检测结果。 +> +> **参数** +> +> > * **im**: 输入图像,注意需为HWC,BGR格式 +> > * **result**: 检测结果,包含的成员如下 +> > * alpha: std::vector\ 包含透明度 +> > * contain_foreground: bool 表示输出是否包含预测的前景 +> > * foreground: std::vector\ 如果模型包含前景预测,则此项为预测的前景 +> > * shape: std::vector\ 包含输出alpha的维度(h,w), 如果包含前景,则shape为(h,w,c) c表示前景的通道数,一般为c=3 + +示例代码参考[cpp/modnet.cc](cpp/modnet.cc) + +## 3. 其它API使用 + +- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/modnet/cpp/CMakeLists.txt b/model_zoo/vision/modnet/cpp/CMakeLists.txt new file mode 100644 index 00000000000..9e500debefe --- /dev/null +++ b/model_zoo/vision/modnet/cpp/CMakeLists.txt @@ -0,0 +1,17 @@ +PROJECT(modnet_demo C CXX) +CMAKE_MINIMUM_REQUIRED(VERSION 3.16) + +# 在低版本ABI环境中,通过如下代码进行兼容性编译 +# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) + +# 指定下载解压后的fastdeploy库路径 +set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) + +add_executable(modnet_demo ${PROJECT_SOURCE_DIR}/modnet.cc) +# 添加FastDeploy库依赖 +target_link_libraries(modnet_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/modnet/cpp/README.md b/model_zoo/vision/modnet/cpp/README.md new file mode 100644 index 00000000000..f2b2e494990 --- /dev/null +++ b/model_zoo/vision/modnet/cpp/README.md @@ -0,0 +1,49 @@ +# 编译ArcFace示例 + +## 0. 简介 +当前支持模型版本为:[MODNet CommitID:28165a4](https://github.com/ZHKKKe/MODNet/commit/28165a4) + +## 1. 下载和解压预测库 +```bash +wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.3.0.tgz +tar xvf fastdeploy-linux-x64-0.3.0.tgz +``` + +## 2. 编译示例代码 +```bash +mkdir build & cd build +cmake .. +make -j +``` + +## 3. 获取ONNX文件 + +访问[MODNet](https://github.com/ZHKKKe/MODNet)官方github库,按照指引下载安装,下载模型文件,利用 `onnx/export_onnx.py` 得到`onnx`格式文件。 + +* 导出onnx格式文件 + ```bash + python -m onnx.export_onnx \ + --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \ + --output-path=pretrained/modnet_photographic_portrait_matting.onnx + ``` +* 移动onnx文件到model_zoo/modnet的目录 + ```bash + cp PATH/TO/modnet_photographic_portrait_matting.onnx PATH/TO/model_zoo/vision/modnet/ + ``` + + +## 4. 准备测试图片 +准备1张仅包含人像的测试图片,命名为matting_1.jpg,并拷贝到可执行文件所在的目录,比如 +```bash +matting_1.jpg +``` + +## 5. 执行 +```bash +./modnet_demo +``` + +执行完成后会输出检测结果如下, 可视化结果保存在`vis_result.jpg`中 +``` +MattingResult[Foreground(false), Alpha(Numel(65536), Shape(256,256), Min(0.000000), Max(1.000000), Mean(0.464415))] +``` diff --git a/model_zoo/vision/modnet/cpp/modnet.cc b/model_zoo/vision/modnet/cpp/modnet.cc new file mode 100644 index 00000000000..b89b2d1dc0b --- /dev/null +++ b/model_zoo/vision/modnet/cpp/modnet.cc @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +int main() { + namespace vis = fastdeploy::vision; + + std::string model_file = "./modnet_photographic_portrait_matting.onnx"; + std::string img_path = "./matting_1.jpg"; + std::string vis_path = "./vis_result.jpg"; + + auto model = vis::zhkkke::MODNet(model_file); + if (!model.Initialized()) { + std::cerr << "Init Failed! Model: " << model_file << std::endl; + return -1; + } else { + std::cout << "Init Done! Model:" << model_file << std::endl; + } + model.EnableDebug(); + + // 设置推理size, 必须和模型文件一致 + model.size = {256, 256}; + + cv::Mat im = cv::imread(img_path); + cv::Mat im_old = im.clone(); + cv::Mat vis_im = im.clone(); + + vis::MattingResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Prediction Failed." << std::endl; + return -1; + } + std::cout << "Prediction Done!" << std::endl; + + // 输出预测结果 + std::cout << res.Str() << std::endl; + + // 可视化预测结果 + bool remove_small_connected_area = true; + vis::Visualize::VisMattingAlpha(im_old, res, &vis_im, + remove_small_connected_area); + cv::imwrite(vis_path, vis_im); + std::cout << "Detect Done! Saved: " << vis_path << std::endl; + return 0; +} diff --git a/model_zoo/vision/modnet/modnet.py b/model_zoo/vision/modnet/modnet.py new file mode 100644 index 00000000000..538a667bdf5 --- /dev/null +++ b/model_zoo/vision/modnet/modnet.py @@ -0,0 +1,22 @@ +import fastdeploy as fd +import cv2 + +# 加载模型 +model = fd.vision.zhkkke.MODNet("modnet_photographic_portrait_matting.onnx") + +# 设置模型输入大小 +model.size = (256, 256) + +# 预测图片 +im = cv2.imread("./matting_1.jpg") +im_old = im.copy() +vis_im = im.copy() + +result = model.predict(im) +# 可视化结果 +fd.vision.visualize.vis_matting_alpha(im_old, result, vis_im, False) +cv2.imwrite("vis_result.jpg", vis_im) + +# 输出预测结果 +print(result) +print(model.runtime_option)