diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
index 082fa30f30b..ccd1039be29 100644
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -95,6 +95,10 @@ endif()
 
 if (ENABLE_TEXT)
 # Add dependency libs later
+  find_library(FASTER_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/lib NO_DEFAULT_PATH)
+  list(APPEND FASTDEPLOY_LIBS ${FASTER_TOKENIZER_LIB})
+  list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/include)
+  list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/third_party/include)
 endif()
 
 if(ENABLE_PADDLE_FRONTEND)
diff --git a/csrc/fastdeploy/core/config.h.in b/csrc/fastdeploy/core/config.h.in
index 7713925867a..b29113f1fdb 100644
--- a/csrc/fastdeploy/core/config.h.in
+++ b/csrc/fastdeploy/core/config.h.in
@@ -45,6 +45,10 @@
 #cmakedefine ENABLE_VISION
 #endif
 
+#ifndef ENABLE_TEXT
+#cmakedefine ENABLE_TEXT
+#endif
+
 #ifndef ENABLE_OPENCV_CUDA
 #cmakedefine ENABLE_OPENCV_CUDA
 #endif
@@ -52,3 +56,7 @@
 #ifndef ENABLE_VISION_VISUALIZE
 #cmakedefine ENABLE_VISION_VISUALIZE
 #endif
+
+#ifndef ENABLE_FDTENSOR_FUNC
+#cmakedefine ENABLE_FDTENSOR_FUNC
+#endif
diff --git a/docs/api/vision_results/README.md b/docs/api/vision_results/README.md
index 844388cca86..64ea4fc671b 100644
--- a/docs/api/vision_results/README.md
+++ b/docs/api/vision_results/README.md
@@ -6,5 +6,6 @@ FastDeploy根据视觉模型的任务类型，定义了不同的结构体(`csrcs
 | :----- | :--- | :---- | :------- |
 | ClassificationResult | [C++/Python文档](./classification_result.md) | 图像分类返回结果 | ResNet50、MobileNetV3等 |
 | DetectionResult | [C++/Python文档](./detection_result.md) | 目标检测返回结果 | PPYOLOE、YOLOv7系列模型等 |
-| FaceDetectionResult | [C++/Python文档](./face_detection_result.md) | 目标检测返回结果 | PPYOLOE、YOLOv7系列模型等 |
-| MattingResult | [C++/Python文档](./matting_result.md) | 目标检测返回结果 | PPYOLOE、YOLOv7系列模型等 |
+| FaceDetectionResult | [C++/Python文档](./face_detection_result.md) | 目标检测返回结果 | SCRFD、RetinaFace系列模型等 |
+| FaceRecognitionResult | [C++/Python文档](./face_recognition_result.md) | 目标检测返回结果 | ArcFace、CosFace系列模型等 |
+| MattingResult | [C++/Python文档](./matting_result.md) | 目标检测返回结果 | MODNet系列模型等 |
diff --git a/docs/api/vision_results/classification_result.md b/docs/api/vision_results/classification_result.md
index 113db39608a..bf94d0ff159 100644
--- a/docs/api/vision_results/classification_result.md
+++ b/docs/api/vision_results/classification_result.md
@@ -2,7 +2,7 @@
 
 ClassifyResult代码定义在`csrcs/fastdeploy/vision/common/result.h`中，用于表明图像的分类结果和置信度。
 
-## C++ 结构体
+## C++ 定义
 
 `fastdeploy::vision::ClassifyResult`
 
@@ -20,7 +20,7 @@ struct ClassifyResult {
 - **Clear()**: 成员函数，用于清除结构体中存储的结果
 - **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
 
-## Python结构体
+## Python 定义
 
 `fastdeploy.vision.ClassifyResult`
 
diff --git a/docs/api/vision_results/detection_result.md b/docs/api/vision_results/detection_result.md
index e44a27b34c3..a702d49899f 100644
--- a/docs/api/vision_results/detection_result.md
+++ b/docs/api/vision_results/detection_result.md
@@ -2,7 +2,7 @@
 
 DetectionResult代码定义在`csrcs/fastdeploy/vision/common/result.h`中，用于表明图像检测出来的目标框、目标类别和目标置信度。
 
-## C++ 结构体
+## C++ 定义
 
 `fastdeploy::vision::DetectionResult`
 
@@ -22,10 +22,10 @@ struct DetectionResult {
 - **Clear()**: 成员函数，用于清除结构体中存储的结果
 - **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
 
-## Python结构体
+## Python 定义
 
 `fastdeploy.vision.DetectionResult`
 
 - **boxes**(list of list(float)): 成员变量，表示单张图片检测出来的所有目标框坐标。boxes是一个list，其每个元素为一个长度为4的list， 表示为一个框，每个框以4个float数值依次表示xmin, ymin, xmax, ymax， 即左上角和右下角坐标
 - **scores**(list of float): 成员变量，表示单张图片检测出来的所有目标置信度
-- **label_ids(list of int): 成员变量，表示单张图片检测出来的所有目标类别
+- **label_ids**(list of int): 成员变量，表示单张图片检测出来的所有目标类别
diff --git a/docs/api/vision_results/face_detection_result.md b/docs/api/vision_results/face_detection_result.md
index 6c9c09f0073..000b42a6be0 100644
--- a/docs/api/vision_results/face_detection_result.md
+++ b/docs/api/vision_results/face_detection_result.md
@@ -1,8 +1,8 @@
 # FaceDetectionResult 人脸检测结果
 
-FaceDetectionResult 代码定义在`csrcs/fastdeploy/vision/common/result.h`中，用于表明图像检测出来的目标框、目标类别和目标置信度。
+FaceDetectionResult 代码定义在`csrcs/fastdeploy/vision/common/result.h`中，用于表明人脸检测出来的目标框、人脸landmarks，目标置信度和每张人脸的landmark数量。
 
-## C++ 结构体
+## C++ 定义
 
 `fastdeploy::vision::FaceDetectionResult`
 
@@ -11,7 +11,6 @@ struct FaceDetectionResult {
   std::vector<std::array<float, 4>> boxes;
   std::vector<std::array<float, 2>> landmarks;
   std::vector<float> scores;
-  ResultType type = ResultType::FACE_DETECTION;
   int landmarks_per_face;
   void Clear();
   std::string Str();
@@ -25,10 +24,11 @@ struct FaceDetectionResult {
 - **Clear()**: 成员函数，用于清除结构体中存储的结果
 - **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
 
-## Python结构体
+## Python 定义
 
 `fastdeploy.vision.FaceDetectionResult`
 
 - **boxes**(list of list(float)): 成员变量，表示单张图片检测出来的所有目标框坐标。boxes是一个list，其每个元素为一个长度为4的list， 表示为一个框，每个框以4个float数值依次表示xmin, ymin, xmax, ymax， 即左上角和右下角坐标
 - **scores**(list of float): 成员变量，表示单张图片检测出来的所有目标置信度
 - **landmarks**: 成员变量，表示单张图片检测出来的所有人脸的关键点
+- **landmarks_per_face**: 成员变量，表示每个人脸框中的关键点的数量。
diff --git a/docs/api/vision_results/face_recognition_result.md b/docs/api/vision_results/face_recognition_result.md
new file mode 100644
index 00000000000..83160561843
--- /dev/null
+++ b/docs/api/vision_results/face_recognition_result.md
@@ -0,0 +1,24 @@
+# FaceRecognitionResult 人脸识别结果
+
+FaceRecognitionResult 代码定义在`csrcs/fastdeploy/vision/common/result.h`中，用于表明人脸识别模型对图像特征的embedding。
+## C++ 定义
+
+`fastdeploy::vision::FaceRecognitionResult`
+
+```
+struct FaceRecognitionResult {
+  std::vector<float> embedding;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **embedding**: 成员变量，表示人脸识别模型最终的提取的特征embedding，可以用来计算人脸之间的特征相似度。
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+## Python 定义
+
+`fastdeploy.vision.FaceRecognitionResult`
+
+- **embedding**: 成员变量，表示人脸识别模型最终提取的特征embedding，可以用来计算人脸之间的特征相似度。
diff --git a/docs/api/vision_results/matting_result.md b/docs/api/vision_results/matting_result.md
index 3418400ecaa..67bcbc79d21 100644
--- a/docs/api/vision_results/matting_result.md
+++ b/docs/api/vision_results/matting_result.md
@@ -1,15 +1,15 @@
 # MattingResult 抠图结果
 
-MattingResult 代码定义在`csrcs/fastdeploy/vision/common/result.h`中，用于表明图像检测出来的目标框、目标类别和目标置信度。
+MattingResult 代码定义在`csrcs/fastdeploy/vision/common/result.h`中，用于表明模型预测的alpha透明度的值，预测的前景等。
 
-## C++ 结构体
+## C++ 定义
 
 `fastdeploy::vision::MattingResult`
 
 ```
 struct MattingResult {
-  std::vector<float> alpha;       // h x w
-  std::vector<float> foreground;  // h x w x c (c=3 default)
+  std::vector<float> alpha;
+  std::vector<float> foreground;
   std::vector<int64_t> shape;
   bool contain_foreground = false;
   void Clear();
@@ -25,7 +25,7 @@ struct MattingResult {
 - **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
 
 
-## Python结构体
+## Python 定义
 
 `fastdeploy.vision.MattingResult`
 
diff --git a/docs/compile/prebuilt_libraries.md b/docs/compile/prebuilt_libraries.md
index 6cec3721acd..bd58fc4b0b8 100644
--- a/docs/compile/prebuilt_libraries.md
+++ b/docs/compile/prebuilt_libraries.md
@@ -19,17 +19,17 @@ FastDeploy提供了在Windows/Linux/Mac上的预先编译CPP部署库，开发
 
 ### Windows 10 x64平台
 
-| 部署库下载地址 | 硬件 |
-| :------------- | :--- |
-| [comming...] | CPU |
-| [comming...] | CPU/GPU |
+| 部署库下载地址 | 硬件 | 说明 |
+| :------------- | :--- | :--- |
+| [fastdeploy-win-x64-0.2.0](https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-win-x64-0.2.0.zip) | CPU | Visual Studio 16 2019 编译产出 |
+| [fastdeploy-win-x64-gpu-0.2.0](https://bj.bcebos.com/paddlehub/fastdeploy/cpp/fastdeploy-win-x64-gpu-0.2.0.zip) | CPU/GPU | Visual Studio 16 2019，cuda 11.2, cudnn 8.2编译产出 |
 
 ### Linux aarch64平台
 
 | 安装包 | 硬件 |
 | :----  | :-- |
 | [comming...] | CPU |
-| [comming...] | Jetson | 
+| [comming...] | Jetson |
 
 ### Mac OSX平台
 
diff --git a/docs/compile/prebuilt_wheels.md b/docs/compile/prebuilt_wheels.md
index 14ba7d40044..e3ada892e11 100644
--- a/docs/compile/prebuilt_wheels.md
+++ b/docs/compile/prebuilt_wheels.md
@@ -38,15 +38,20 @@ python -m pip install fastdeploy_python-0.2.0-cp38-cp38-manylinux1_x86_64.whl
 
 | CPU 安装包 | 硬件 | Python版本 |
 | :----  | :-- | :------ |
-| [comming...] | CPU | 3.8 |
-| [comming...] | CPU | 3.9 |
+| [fastdeploy_python-0.2.0-cp38-cp38-win_amd64.whl](https://bj.bcebos.com/paddlehub/fastdeploy/wheels/fastdeploy_python-0.2.0-cp38-cp38-win_amd64.whl) | CPU | 3.8 |
+| [fastdeploy_python-0.2.0-cp39-cp39-win_amd64.whl](https://bj.bcebos.com/paddlehub/fastdeploy/wheels/fastdeploy_python-0.2.0-cp39-cp39-win_amd64.whl) | CPU | 3.9 |
+
+| GPU 安装包 | 硬件 | Python版本 |
+| :----  | :-- | :------ |
+| [fastdeploy_gpu_python-0.2.0-cp38-cp38-win_amd64.whl](https://bj.bcebos.com/paddlehub/fastdeploy/wheels/fastdeploy_gpu_python-0.2.0-cp38-cp38-win_amd64.whl) | CPU/GPU | 3.8 |
+| [fastdeploy_gpu_python-0.2.0-cp39-cp39-win_amd64.whl](https://bj.bcebos.com/paddlehub/fastdeploy/wheels/fastdeploy_gpu_python-0.2.0-cp39-cp39-win_amd64.whl) | CPU/GPU | 3.9 |
 
 ### Linux aarch64平台
 
 | 安装包 | 硬件 | Python版本 |
 | :----  | :-- | :------ |
 | [comming...] | CPU | 3.7 |
-| [comming...] | CPU | 3.8 | 
+| [comming...] | CPU | 3.8 |
 | [comming...] | CPU | 3.9 |
 
 ### Mac OSX平台
diff --git a/examples/text/information_extraction/ernie/cpp/CMakeLists.txt b/examples/text/information_extraction/ernie/cpp/CMakeLists.txt
new file mode 100644
index 00000000000..1189820cb79
--- /dev/null
+++ b/examples/text/information_extraction/ernie/cpp/CMakeLists.txt
@@ -0,0 +1,25 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_ernie_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_ernie_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/text/information_extraction/ernie/cpp/infer.cc b/examples/text/information_extraction/ernie/cpp/infer.cc
new file mode 100644
index 00000000000..7f3b9318664
--- /dev/null
+++ b/examples/text/information_extraction/ernie/cpp/infer.cc
@@ -0,0 +1,182 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <iostream>
+#include <sstream>
+
+#include "fastdeploy/function/reduce.h"
+#include "fastdeploy/function/softmax.h"
+#include "fastdeploy/text.h"
+#include "tokenizers/ernie_faster_tokenizer.h"
+
+using namespace paddlenlp;
+
+void LoadTransitionFromFile(const std::string& file,
+                            std::vector<float>* transitions, int* num_tags) {
+  std::ifstream fin(file);
+  std::string curr_transition;
+  float transition;
+  int i = 0;
+  while (fin) {
+    std::getline(fin, curr_transition);
+    std::istringstream iss(curr_transition);
+    while (iss) {
+      iss >> transition;
+      transitions->push_back(transition);
+    }
+    if (curr_transition != "") {
+      ++i;
+    }
+  }
+  *num_tags = i;
+}
+
+template <typename T>
+void ViterbiDecode(const fastdeploy::FDTensor& slot_logits,
+                   const fastdeploy::FDTensor& trans,
+                   fastdeploy::FDTensor* best_path) {
+  int batch_size = slot_logits.shape[0];
+  int seq_len = slot_logits.shape[1];
+  int num_tags = slot_logits.shape[2];
+  best_path->Allocate({batch_size, seq_len}, fastdeploy::FDDataType::INT64);
+
+  const T* slot_logits_ptr = reinterpret_cast<const T*>(slot_logits.Data());
+  const T* trans_ptr = reinterpret_cast<const T*>(trans.Data());
+  int64_t* best_path_ptr = reinterpret_cast<int64_t*>(best_path->Data());
+  std::vector<T> scores(num_tags);
+  std::copy(slot_logits_ptr, slot_logits_ptr + num_tags, scores.begin());
+  std::vector<std::vector<T>> M(num_tags, std::vector<T>(num_tags));
+  for (int b = 0; b < batch_size; ++b) {
+    std::vector<std::vector<int>> paths;
+    const T* curr_slot_logits_ptr = slot_logits_ptr + b * seq_len * num_tags;
+    int64_t* curr_best_path_ptr = best_path_ptr + b * seq_len;
+    for (int t = 1; t < seq_len; t++) {
+      for (size_t i = 0; i < num_tags; i++) {
+        for (size_t j = 0; j < num_tags; j++) {
+          auto trans_idx = i * num_tags * num_tags + j * num_tags;
+          auto slot_logit_idx = t * num_tags + j;
+          M[i][j] = scores[i] + trans_ptr[trans_idx] +
+                    curr_slot_logits_ptr[slot_logit_idx];
+        }
+      }
+      std::vector<int> idxs;
+      for (size_t i = 0; i < num_tags; i++) {
+        T max = 0.0f;
+        int idx = 0;
+        for (size_t j = 0; j < num_tags; j++) {
+          if (M[j][i] > max) {
+            max = M[j][i];
+            idx = j;
+          }
+        }
+        scores[i] = max;
+        idxs.push_back(idx);
+      }
+      paths.push_back(idxs);
+    }
+    int scores_max_index = 0;
+    float scores_max = 0.0f;
+    for (size_t i = 0; i < scores.size(); i++) {
+      if (scores[i] > scores_max) {
+        scores_max = scores[i];
+        scores_max_index = i;
+      }
+    }
+    curr_best_path_ptr[seq_len - 1] = scores_max_index;
+    for (int i = seq_len - 2; i >= 0; i--) {
+      int index = curr_best_path_ptr[i + 1];
+      curr_best_path_ptr[i] = paths[i][index];
+    }
+  }
+}
+
+int main() {
+  // 1. Define a ernie faster tokenizer
+  faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer(
+      "ernie_vocab.txt");
+  std::vector<faster_tokenizer::core::EncodeInput> strings_list = {
+      "导航去科技园二号楼", "屏幕亮度为我减小一点吧"};
+  std::vector<faster_tokenizer::core::Encoding> encodings;
+  tokenizer.EncodeBatchStrings(strings_list, &encodings);
+  size_t batch_size = strings_list.size();
+  size_t seq_len = encodings[0].GetLen();
+  for (auto&& encoding : encodings) {
+    std::cout << encoding.DebugString() << std::endl;
+  }
+  // 2. Initialize runtime
+  fastdeploy::RuntimeOption runtime_option;
+  runtime_option.SetModelPath("nano_static/model.pdmodel",
+                              "nano_static/model.pdiparams");
+  fastdeploy::Runtime runtime;
+  runtime.Init(runtime_option);
+
+  // 3. Construct input vector
+  // 3.1 Convert encodings to input_ids, token_type_ids
+  std::vector<int64_t> input_ids, token_type_ids;
+  for (int i = 0; i < encodings.size(); ++i) {
+    auto&& curr_input_ids = encodings[i].GetIds();
+    auto&& curr_type_ids = encodings[i].GetTypeIds();
+    input_ids.insert(input_ids.end(), curr_input_ids.begin(),
+                     curr_input_ids.end());
+    token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(),
+                          curr_type_ids.end());
+  }
+  // 3.2 Set data to input vector
+  std::vector<fastdeploy::FDTensor> inputs(runtime.NumInputs());
+  void* inputs_ptrs[] = {input_ids.data(), token_type_ids.data()};
+  for (int i = 0; i < runtime.NumInputs(); ++i) {
+    inputs[i].SetExternalData({batch_size, seq_len},
+                              fastdeploy::FDDataType::INT64, inputs_ptrs[i]);
+    inputs[i].name = runtime.GetInputInfo(i).name;
+  }
+
+  // 4. Infer
+  std::vector<fastdeploy::FDTensor> outputs(runtime.NumOutputs());
+  runtime.Infer(inputs, &outputs);
+
+  // 5. Postprocess
+  fastdeploy::FDTensor domain_probs, intent_probs;
+  fastdeploy::Softmax(outputs[0], &domain_probs);
+  fastdeploy::Softmax(outputs[1], &intent_probs);
+
+  fastdeploy::FDTensor domain_max_probs, intent_max_probs;
+  fastdeploy::Max(domain_probs, &domain_max_probs, {-1}, true);
+  fastdeploy::Max(intent_probs, &intent_max_probs, {-1}, true);
+
+  std::vector<float> transition;
+  int num_tags;
+  LoadTransitionFromFile("joint_transition.txt", &transition, &num_tags);
+  fastdeploy::FDTensor trans;
+  trans.SetExternalData({num_tags, num_tags}, fastdeploy::FDDataType::FP32,
+                        transition.data());
+
+  fastdeploy::FDTensor best_path;
+  ViterbiDecode<float>(outputs[2], trans, &best_path);
+  // 6. Print result
+  domain_max_probs.PrintInfo();
+  intent_max_probs.PrintInfo();
+
+  batch_size = best_path.shape[0];
+  seq_len = best_path.shape[1];
+  const int64_t* best_path_ptr =
+      reinterpret_cast<const int64_t*>(best_path.Data());
+  for (int i = 0; i < batch_size; ++i) {
+    std::cout << "best_path[" << i << "] = ";
+    for (int j = 0; j < seq_len; ++j) {
+      std::cout << best_path_ptr[i * seq_len + j] << ", ";
+    }
+    std::cout << std::endl;
+  }
+  best_path.PrintInfo();
+  return 0;
+}
diff --git a/examples/vision/README.md b/examples/vision/README.md
index 9f05d2d7f6d..d95a315d798 100644
--- a/examples/vision/README.md
+++ b/examples/vision/README.md
@@ -8,6 +8,7 @@
 | Segmentation   | 语义分割，输入图像，给出图像中每个像素的分类及置信度          | [SegmentationResult](../../docs/api/vision_results/segmentation_result.md) |
 | Classification | 图像分类，输入图像，给出图像的分类结果和置信度             | [ClassifyResult](../../docs/api/vision_results/classification_result.md)   |
 | FaceDetection | 人脸检测，输入图像，检测图像中人脸位置，并返回检测框坐标及人脸关键点             | [FaceDetectionResult](../../docs/api/vision_results/face_detection_result.md)   |
+| FaceRecognition | 人脸识别，输入图像，返回可用于相似度计算的人脸特征的embedding            | [FaceRecognitionResult](../../docs/api/vision_results/face_recognition_result.md)   |
 | Matting | 抠图，输入图像，返回图片的前景每个像素点的Alpha值            | [MattingResult](../../docs/api/vision_results/matting_result.md)   |
 ## FastDeploy API设计