diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index 082fa30f30b..ccd1039be29 100644 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -95,6 +95,10 @@ endif() if (ENABLE_TEXT) # Add dependency libs later + find_library(FASTER_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/lib NO_DEFAULT_PATH) + list(APPEND FASTDEPLOY_LIBS ${FASTER_TOKENIZER_LIB}) + list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/include) + list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/third_party/include) endif() if(ENABLE_PADDLE_FRONTEND) diff --git a/csrc/fastdeploy/core/config.h.in b/csrc/fastdeploy/core/config.h.in index 7713925867a..b29113f1fdb 100644 --- a/csrc/fastdeploy/core/config.h.in +++ b/csrc/fastdeploy/core/config.h.in @@ -45,6 +45,10 @@ #cmakedefine ENABLE_VISION #endif +#ifndef ENABLE_TEXT +#cmakedefine ENABLE_TEXT +#endif + #ifndef ENABLE_OPENCV_CUDA #cmakedefine ENABLE_OPENCV_CUDA #endif @@ -52,3 +56,7 @@ #ifndef ENABLE_VISION_VISUALIZE #cmakedefine ENABLE_VISION_VISUALIZE #endif + +#ifndef ENABLE_FDTENSOR_FUNC +#cmakedefine ENABLE_FDTENSOR_FUNC +#endif diff --git a/docs/api/vision_results/README.md b/docs/api/vision_results/README.md index 844388cca86..64ea4fc671b 100644 --- a/docs/api/vision_results/README.md +++ b/docs/api/vision_results/README.md @@ -6,5 +6,6 @@ FastDeploy根据视觉模型的任务类型,定义了不同的结构体(`csrcs | :----- | :--- | :---- | :------- | | ClassificationResult | [C++/Python文档](./classification_result.md) | 图像分类返回结果 | ResNet50、MobileNetV3等 | | DetectionResult | [C++/Python文档](./detection_result.md) | 目标检测返回结果 | PPYOLOE、YOLOv7系列模型等 | -| FaceDetectionResult | [C++/Python文档](./face_detection_result.md) | 目标检测返回结果 | PPYOLOE、YOLOv7系列模型等 | -| MattingResult | [C++/Python文档](./matting_result.md) | 目标检测返回结果 | PPYOLOE、YOLOv7系列模型等 | +| FaceDetectionResult | [C++/Python文档](./face_detection_result.md) | 目标检测返回结果 | SCRFD、RetinaFace系列模型等 | +| FaceRecognitionResult | [C++/Python文档](./face_recognition_result.md) | 目标检测返回结果 | ArcFace、CosFace系列模型等 | +| MattingResult | [C++/Python文档](./matting_result.md) | 目标检测返回结果 | MODNet系列模型等 | diff --git a/docs/api/vision_results/classification_result.md b/docs/api/vision_results/classification_result.md index 113db39608a..bf94d0ff159 100644 --- a/docs/api/vision_results/classification_result.md +++ b/docs/api/vision_results/classification_result.md @@ -2,7 +2,7 @@ ClassifyResult代码定义在`csrcs/fastdeploy/vision/common/result.h`中,用于表明图像的分类结果和置信度。 -## C++ 结构体 +## C++ 定义 `fastdeploy::vision::ClassifyResult` @@ -20,7 +20,7 @@ struct ClassifyResult { - **Clear()**: 成员函数,用于清除结构体中存储的结果 - **Str()**: 成员函数,将结构体中的信息以字符串形式输出(用于Debug) -## Python结构体 +## Python 定义 `fastdeploy.vision.ClassifyResult` diff --git a/docs/api/vision_results/detection_result.md b/docs/api/vision_results/detection_result.md index e44a27b34c3..a702d49899f 100644 --- a/docs/api/vision_results/detection_result.md +++ b/docs/api/vision_results/detection_result.md @@ -2,7 +2,7 @@ DetectionResult代码定义在`csrcs/fastdeploy/vision/common/result.h`中,用于表明图像检测出来的目标框、目标类别和目标置信度。 -## C++ 结构体 +## C++ 定义 `fastdeploy::vision::DetectionResult` @@ -22,10 +22,10 @@ struct DetectionResult { - **Clear()**: 成员函数,用于清除结构体中存储的结果 - **Str()**: 成员函数,将结构体中的信息以字符串形式输出(用于Debug) -## Python结构体 +## Python 定义 `fastdeploy.vision.DetectionResult` - **boxes**(list of list(float)): 成员变量,表示单张图片检测出来的所有目标框坐标。boxes是一个list,其每个元素为一个长度为4的list, 表示为一个框,每个框以4个float数值依次表示xmin, ymin, xmax, ymax, 即左上角和右下角坐标 - **scores**(list of float): 成员变量,表示单张图片检测出来的所有目标置信度 -- **label_ids(list of int): 成员变量,表示单张图片检测出来的所有目标类别 +- **label_ids**(list of int): 成员变量,表示单张图片检测出来的所有目标类别 diff --git a/docs/api/vision_results/face_detection_result.md b/docs/api/vision_results/face_detection_result.md index 6c9c09f0073..000b42a6be0 100644 --- a/docs/api/vision_results/face_detection_result.md +++ b/docs/api/vision_results/face_detection_result.md @@ -1,8 +1,8 @@ # FaceDetectionResult 人脸检测结果 -FaceDetectionResult 代码定义在`csrcs/fastdeploy/vision/common/result.h`中,用于表明图像检测出来的目标框、目标类别和目标置信度。 +FaceDetectionResult 代码定义在`csrcs/fastdeploy/vision/common/result.h`中,用于表明人脸检测出来的目标框、人脸landmarks,目标置信度和每张人脸的landmark数量。 -## C++ 结构体 +## C++ 定义 `fastdeploy::vision::FaceDetectionResult` @@ -11,7 +11,6 @@ struct FaceDetectionResult { std::vector> boxes; std::vector> landmarks; std::vector scores; - ResultType type = ResultType::FACE_DETECTION; int landmarks_per_face; void Clear(); std::string Str(); @@ -25,10 +24,11 @@ struct FaceDetectionResult { - **Clear()**: 成员函数,用于清除结构体中存储的结果 - **Str()**: 成员函数,将结构体中的信息以字符串形式输出(用于Debug) -## Python结构体 +## Python 定义 `fastdeploy.vision.FaceDetectionResult` - **boxes**(list of list(float)): 成员变量,表示单张图片检测出来的所有目标框坐标。boxes是一个list,其每个元素为一个长度为4的list, 表示为一个框,每个框以4个float数值依次表示xmin, ymin, xmax, ymax, 即左上角和右下角坐标 - **scores**(list of float): 成员变量,表示单张图片检测出来的所有目标置信度 - **landmarks**: 成员变量,表示单张图片检测出来的所有人脸的关键点 +- **landmarks_per_face**: 成员变量,表示每个人脸框中的关键点的数量。 diff --git a/docs/api/vision_results/face_recognition_result.md b/docs/api/vision_results/face_recognition_result.md new file mode 100644 index 00000000000..83160561843 --- /dev/null +++ b/docs/api/vision_results/face_recognition_result.md @@ -0,0 +1,24 @@ +# FaceRecognitionResult 人脸识别结果 + +FaceRecognitionResult 代码定义在`csrcs/fastdeploy/vision/common/result.h`中,用于表明人脸识别模型对图像特征的embedding。 +## C++ 定义 + +`fastdeploy::vision::FaceRecognitionResult` + +``` +struct FaceRecognitionResult { + std::vector embedding; + void Clear(); + std::string Str(); +}; +``` + +- **embedding**: 成员变量,表示人脸识别模型最终的提取的特征embedding,可以用来计算人脸之间的特征相似度。 +- **Clear()**: 成员函数,用于清除结构体中存储的结果 +- **Str()**: 成员函数,将结构体中的信息以字符串形式输出(用于Debug) + +## Python 定义 + +`fastdeploy.vision.FaceRecognitionResult` + +- **embedding**: 成员变量,表示人脸识别模型最终提取的特征embedding,可以用来计算人脸之间的特征相似度。 diff --git a/docs/api/vision_results/matting_result.md b/docs/api/vision_results/matting_result.md index 3418400ecaa..67bcbc79d21 100644 --- a/docs/api/vision_results/matting_result.md +++ b/docs/api/vision_results/matting_result.md @@ -1,15 +1,15 @@ # MattingResult 抠图结果 -MattingResult 代码定义在`csrcs/fastdeploy/vision/common/result.h`中,用于表明图像检测出来的目标框、目标类别和目标置信度。 +MattingResult 代码定义在`csrcs/fastdeploy/vision/common/result.h`中,用于表明模型预测的alpha透明度的值,预测的前景等。 -## C++ 结构体 +## C++ 定义 `fastdeploy::vision::MattingResult` ``` struct MattingResult { - std::vector alpha; // h x w - std::vector foreground; // h x w x c (c=3 default) + std::vector alpha; + std::vector foreground; std::vector shape; bool contain_foreground = false; void Clear(); @@ -25,7 +25,7 @@ struct MattingResult { - **Str()**: 成员函数,将结构体中的信息以字符串形式输出(用于Debug) -## Python结构体 +## Python 定义 `fastdeploy.vision.MattingResult` diff --git a/docs/compile/prebuilt_libraries.md b/docs/compile/prebuilt_libraries.md index 6cec3721acd..bd58fc4b0b8 100644 --- a/docs/compile/prebuilt_libraries.md +++ b/docs/compile/prebuilt_libraries.md @@ -19,17 +19,17 @@ FastDeploy提供了在Windows/Linux/Mac上的预先编译CPP部署库,开发 ### Windows 10 x64平台 -| 部署库下载地址 | 硬件 | -| :------------- | :--- | -| [comming...] | CPU | -| [comming...] | CPU/GPU | +| 部署库下载地址 | 硬件 | 说明 | +| :------------- | :--- | :--- | +| [fastdeploy-win-x64-0.2.0](https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-win-x64-0.2.0.zip) | CPU | Visual Studio 16 2019 编译产出 | +| [fastdeploy-win-x64-gpu-0.2.0](https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-win-x64-gpu-0.2.0.zip) | CPU/GPU | Visual Studio 16 2019,cuda 11.2, cudnn 8.2编译产出 | ### Linux aarch64平台 | 安装包 | 硬件 | | :---- | :-- | | [comming...] | CPU | -| [comming...] | Jetson | +| [comming...] | Jetson | ### Mac OSX平台 diff --git a/docs/compile/prebuilt_wheels.md b/docs/compile/prebuilt_wheels.md index 14ba7d40044..e3ada892e11 100644 --- a/docs/compile/prebuilt_wheels.md +++ b/docs/compile/prebuilt_wheels.md @@ -38,15 +38,20 @@ python -m pip install fastdeploy_python-0.2.0-cp38-cp38-manylinux1_x86_64.whl | CPU 安装包 | 硬件 | Python版本 | | :---- | :-- | :------ | -| [comming...] | CPU | 3.8 | -| [comming...] | CPU | 3.9 | +| [fastdeploy_python-0.2.0-cp38-cp38-win_amd64.whl](https://bj.bcebos.com/paddlehub/fastdeploy/wheels/fastdeploy_python-0.2.0-cp38-cp38-win_amd64.whl) | CPU | 3.8 | +| [fastdeploy_python-0.2.0-cp39-cp39-win_amd64.whl](https://bj.bcebos.com/paddlehub/fastdeploy/wheels/fastdeploy_python-0.2.0-cp39-cp39-win_amd64.whl) | CPU | 3.9 | + +| GPU 安装包 | 硬件 | Python版本 | +| :---- | :-- | :------ | +| [fastdeploy_gpu_python-0.2.0-cp38-cp38-win_amd64.whl](https://bj.bcebos.com/paddlehub/fastdeploy/wheels/fastdeploy_gpu_python-0.2.0-cp38-cp38-win_amd64.whl) | CPU/GPU | 3.8 | +| [fastdeploy_gpu_python-0.2.0-cp39-cp39-win_amd64.whl](https://bj.bcebos.com/paddlehub/fastdeploy/wheels/fastdeploy_gpu_python-0.2.0-cp39-cp39-win_amd64.whl) | CPU/GPU | 3.9 | ### Linux aarch64平台 | 安装包 | 硬件 | Python版本 | | :---- | :-- | :------ | | [comming...] | CPU | 3.7 | -| [comming...] | CPU | 3.8 | +| [comming...] | CPU | 3.8 | | [comming...] | CPU | 3.9 | ### Mac OSX平台 diff --git a/examples/text/information_extraction/ernie/cpp/CMakeLists.txt b/examples/text/information_extraction/ernie/cpp/CMakeLists.txt new file mode 100644 index 00000000000..1189820cb79 --- /dev/null +++ b/examples/text/information_extraction/ernie/cpp/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.12) + +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +include_directories(${FASTDEPLOY_INCS}) + +add_executable(infer_ernie_demo ${PROJECT_SOURCE_DIR}/infer.cc) +target_link_libraries(infer_ernie_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/text/information_extraction/ernie/cpp/infer.cc b/examples/text/information_extraction/ernie/cpp/infer.cc new file mode 100644 index 00000000000..7f3b9318664 --- /dev/null +++ b/examples/text/information_extraction/ernie/cpp/infer.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include + +#include "fastdeploy/function/reduce.h" +#include "fastdeploy/function/softmax.h" +#include "fastdeploy/text.h" +#include "tokenizers/ernie_faster_tokenizer.h" + +using namespace paddlenlp; + +void LoadTransitionFromFile(const std::string& file, + std::vector* transitions, int* num_tags) { + std::ifstream fin(file); + std::string curr_transition; + float transition; + int i = 0; + while (fin) { + std::getline(fin, curr_transition); + std::istringstream iss(curr_transition); + while (iss) { + iss >> transition; + transitions->push_back(transition); + } + if (curr_transition != "") { + ++i; + } + } + *num_tags = i; +} + +template +void ViterbiDecode(const fastdeploy::FDTensor& slot_logits, + const fastdeploy::FDTensor& trans, + fastdeploy::FDTensor* best_path) { + int batch_size = slot_logits.shape[0]; + int seq_len = slot_logits.shape[1]; + int num_tags = slot_logits.shape[2]; + best_path->Allocate({batch_size, seq_len}, fastdeploy::FDDataType::INT64); + + const T* slot_logits_ptr = reinterpret_cast(slot_logits.Data()); + const T* trans_ptr = reinterpret_cast(trans.Data()); + int64_t* best_path_ptr = reinterpret_cast(best_path->Data()); + std::vector scores(num_tags); + std::copy(slot_logits_ptr, slot_logits_ptr + num_tags, scores.begin()); + std::vector> M(num_tags, std::vector(num_tags)); + for (int b = 0; b < batch_size; ++b) { + std::vector> paths; + const T* curr_slot_logits_ptr = slot_logits_ptr + b * seq_len * num_tags; + int64_t* curr_best_path_ptr = best_path_ptr + b * seq_len; + for (int t = 1; t < seq_len; t++) { + for (size_t i = 0; i < num_tags; i++) { + for (size_t j = 0; j < num_tags; j++) { + auto trans_idx = i * num_tags * num_tags + j * num_tags; + auto slot_logit_idx = t * num_tags + j; + M[i][j] = scores[i] + trans_ptr[trans_idx] + + curr_slot_logits_ptr[slot_logit_idx]; + } + } + std::vector idxs; + for (size_t i = 0; i < num_tags; i++) { + T max = 0.0f; + int idx = 0; + for (size_t j = 0; j < num_tags; j++) { + if (M[j][i] > max) { + max = M[j][i]; + idx = j; + } + } + scores[i] = max; + idxs.push_back(idx); + } + paths.push_back(idxs); + } + int scores_max_index = 0; + float scores_max = 0.0f; + for (size_t i = 0; i < scores.size(); i++) { + if (scores[i] > scores_max) { + scores_max = scores[i]; + scores_max_index = i; + } + } + curr_best_path_ptr[seq_len - 1] = scores_max_index; + for (int i = seq_len - 2; i >= 0; i--) { + int index = curr_best_path_ptr[i + 1]; + curr_best_path_ptr[i] = paths[i][index]; + } + } +} + +int main() { + // 1. Define a ernie faster tokenizer + faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer( + "ernie_vocab.txt"); + std::vector strings_list = { + "导航去科技园二号楼", "屏幕亮度为我减小一点吧"}; + std::vector encodings; + tokenizer.EncodeBatchStrings(strings_list, &encodings); + size_t batch_size = strings_list.size(); + size_t seq_len = encodings[0].GetLen(); + for (auto&& encoding : encodings) { + std::cout << encoding.DebugString() << std::endl; + } + // 2. Initialize runtime + fastdeploy::RuntimeOption runtime_option; + runtime_option.SetModelPath("nano_static/model.pdmodel", + "nano_static/model.pdiparams"); + fastdeploy::Runtime runtime; + runtime.Init(runtime_option); + + // 3. Construct input vector + // 3.1 Convert encodings to input_ids, token_type_ids + std::vector input_ids, token_type_ids; + for (int i = 0; i < encodings.size(); ++i) { + auto&& curr_input_ids = encodings[i].GetIds(); + auto&& curr_type_ids = encodings[i].GetTypeIds(); + input_ids.insert(input_ids.end(), curr_input_ids.begin(), + curr_input_ids.end()); + token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(), + curr_type_ids.end()); + } + // 3.2 Set data to input vector + std::vector inputs(runtime.NumInputs()); + void* inputs_ptrs[] = {input_ids.data(), token_type_ids.data()}; + for (int i = 0; i < runtime.NumInputs(); ++i) { + inputs[i].SetExternalData({batch_size, seq_len}, + fastdeploy::FDDataType::INT64, inputs_ptrs[i]); + inputs[i].name = runtime.GetInputInfo(i).name; + } + + // 4. Infer + std::vector outputs(runtime.NumOutputs()); + runtime.Infer(inputs, &outputs); + + // 5. Postprocess + fastdeploy::FDTensor domain_probs, intent_probs; + fastdeploy::Softmax(outputs[0], &domain_probs); + fastdeploy::Softmax(outputs[1], &intent_probs); + + fastdeploy::FDTensor domain_max_probs, intent_max_probs; + fastdeploy::Max(domain_probs, &domain_max_probs, {-1}, true); + fastdeploy::Max(intent_probs, &intent_max_probs, {-1}, true); + + std::vector transition; + int num_tags; + LoadTransitionFromFile("joint_transition.txt", &transition, &num_tags); + fastdeploy::FDTensor trans; + trans.SetExternalData({num_tags, num_tags}, fastdeploy::FDDataType::FP32, + transition.data()); + + fastdeploy::FDTensor best_path; + ViterbiDecode(outputs[2], trans, &best_path); + // 6. Print result + domain_max_probs.PrintInfo(); + intent_max_probs.PrintInfo(); + + batch_size = best_path.shape[0]; + seq_len = best_path.shape[1]; + const int64_t* best_path_ptr = + reinterpret_cast(best_path.Data()); + for (int i = 0; i < batch_size; ++i) { + std::cout << "best_path[" << i << "] = "; + for (int j = 0; j < seq_len; ++j) { + std::cout << best_path_ptr[i * seq_len + j] << ", "; + } + std::cout << std::endl; + } + best_path.PrintInfo(); + return 0; +} diff --git a/examples/vision/README.md b/examples/vision/README.md index 9f05d2d7f6d..d95a315d798 100644 --- a/examples/vision/README.md +++ b/examples/vision/README.md @@ -8,6 +8,7 @@ | Segmentation | 语义分割,输入图像,给出图像中每个像素的分类及置信度 | [SegmentationResult](../../docs/api/vision_results/segmentation_result.md) | | Classification | 图像分类,输入图像,给出图像的分类结果和置信度 | [ClassifyResult](../../docs/api/vision_results/classification_result.md) | | FaceDetection | 人脸检测,输入图像,检测图像中人脸位置,并返回检测框坐标及人脸关键点 | [FaceDetectionResult](../../docs/api/vision_results/face_detection_result.md) | +| FaceRecognition | 人脸识别,输入图像,返回可用于相似度计算的人脸特征的embedding | [FaceRecognitionResult](../../docs/api/vision_results/face_recognition_result.md) | | Matting | 抠图,输入图像,返回图片的前景每个像素点的Alpha值 | [MattingResult](../../docs/api/vision_results/matting_result.md) | ## FastDeploy API设计