diff --git a/csrcs/fastdeploy/core/fd_tensor.cc b/csrcs/fastdeploy/core/fd_tensor.cc index dbefbd9ecca..c278763cabe 100644 --- a/csrcs/fastdeploy/core/fd_tensor.cc +++ b/csrcs/fastdeploy/core/fd_tensor.cc @@ -50,6 +50,13 @@ void* FDTensor::Data() { return data.data(); } +const void* FDTensor::Data() const { + if (external_data_ptr != nullptr) { + return external_data_ptr; + } + return data.data(); +} + void FDTensor::SetExternalData(const std::vector& new_shape, const FDDataType& data_type, void* data_buffer) { dtype = data_type; diff --git a/csrcs/fastdeploy/core/fd_tensor.h b/csrcs/fastdeploy/core/fd_tensor.h index a00ff87fdfc..14c5a1142be 100644 --- a/csrcs/fastdeploy/core/fd_tensor.h +++ b/csrcs/fastdeploy/core/fd_tensor.h @@ -54,6 +54,8 @@ struct FASTDEPLOY_DECL FDTensor { // will copy to cpu store in `temporary_cpu_buffer` void* Data(); + const void* Data() const; + // Set user memory buffer for Tensor, the memory is managed by // the user it self, but the Tensor will share the memory with user // So take care with the user buffer @@ -81,4 +83,4 @@ struct FASTDEPLOY_DECL FDTensor { explicit FDTensor(const std::string& tensor_name); }; -} // namespace fastdeploy +} // namespace fastdeploy diff --git a/examples/text/compute.h b/examples/text/compute.h index dc024a61efc..b279473b75c 100644 --- a/examples/text/compute.h +++ b/examples/text/compute.h @@ -225,8 +225,8 @@ void CommonElementwiseBroadcastForward(const FDTensor& x, const FDTensor& y, y_dims_array.data(), out_dims_array.data(), max_dim, axis); - const T* x_data = reinterpret_cast(x.data.data()); - const T* y_data = reinterpret_cast(y.data.data()); + const T* x_data = reinterpret_cast(x.Data()); + const T* y_data = reinterpret_cast(y.Data()); z->Allocate(out_dims_array, TypeToDataType::dtype); OutType* out_data = reinterpret_cast(z->MutableData()); diff --git a/examples/text/ernie_tokencls.cc b/examples/text/ernie_tokencls.cc index 924f98b0cf9..4df1f570556 100644 --- a/examples/text/ernie_tokencls.cc +++ b/examples/text/ernie_tokencls.cc @@ -12,12 +12,144 @@ // See the License for the specific language governing permissions and // limitations under the License. #include +#include -#include "compute.h" #include "fastdeploy/text.h" #include "tokenizers/ernie_faster_tokenizer.h" using namespace paddlenlp; + +void LoadTransitionFromFile(const std::string& file, + std::vector* transitions, int* num_tags) { + std::ifstream fin(file); + std::string curr_transition; + float transition; + int i = 0; + while (fin) { + std::getline(fin, curr_transition); + std::istringstream iss(curr_transition); + while (iss) { + iss >> transition; + transitions->push_back(transition); + } + if (curr_transition != "") { + ++i; + } + } + *num_tags = i; +} + +// Only useful for axis = -1 +template +void Softmax(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) { + auto softmax_func = [](const T* score_vec, T* softmax_vec, int label_num) { + double score_max = *(std::max_element(score_vec, score_vec + label_num)); + double e_sum = 0; + for (int j = 0; j < label_num; j++) { + softmax_vec[j] = std::exp(score_vec[j] - score_max); + e_sum += softmax_vec[j]; + } + for (int k = 0; k < label_num; k++) { + softmax_vec[k] /= e_sum; + } + }; + + std::vector output_shape; + for (int i = 0; i < input.shape.size(); ++i) { + output_shape.push_back(input.shape[i]); + } + output->Allocate(output_shape, input.dtype); + int label_num = output_shape.back(); + int batch_size = input.Numel() / label_num; + int offset = 0; + const T* input_ptr = reinterpret_cast(input.Data()); + T* output_ptr = reinterpret_cast(output->Data()); + for (int i = 0; i < batch_size; ++i) { + softmax_func(input_ptr + offset, output_ptr + offset, label_num); + offset += label_num; + } +} + +// Only useful for axis = -1 +template +void Max(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) { + std::vector output_shape; + for (int i = 0; i < input.shape.size() - 1; ++i) { + output_shape.push_back(input.shape[i]); + } + output_shape.push_back(1); + output->Allocate(output_shape, input.dtype); + int batch_size = output->Numel(); + int label_num = input.shape.back(); + int offset = 0; + const T* input_ptr = reinterpret_cast(input.Data()); + T* output_ptr = reinterpret_cast(output->Data()); + for (int i = 0; i < batch_size; ++i) { + output_ptr[i] = + *(std::max_element(input_ptr + offset, input_ptr + offset + label_num)); + offset += label_num; + } +} + +template +void ViterbiDecode(const fastdeploy::FDTensor& slot_logits, + const fastdeploy::FDTensor& trans, + fastdeploy::FDTensor* best_path) { + int batch_size = slot_logits.shape[0]; + int seq_len = slot_logits.shape[1]; + int num_tags = slot_logits.shape[2]; + best_path->Allocate({batch_size, seq_len}, fastdeploy::FDDataType::INT64); + + const T* slot_logits_ptr = reinterpret_cast(slot_logits.Data()); + const T* trans_ptr = reinterpret_cast(trans.Data()); + int64_t* best_path_ptr = reinterpret_cast(best_path->Data()); + std::vector scores(num_tags); + std::copy(slot_logits_ptr, slot_logits_ptr + num_tags, scores.begin()); + std::vector> M(num_tags, std::vector(num_tags)); + for (int b = 0; b < batch_size; ++b) { + std::vector> paths; + const T* curr_slot_logits_ptr = slot_logits_ptr + b * seq_len * num_tags; + int64_t* curr_best_path_ptr = best_path_ptr + b * seq_len; + for (int t = 1; t < seq_len; t++) { + for (size_t i = 0; i < num_tags; i++) { + for (size_t j = 0; j < num_tags; j++) { + auto trans_idx = i * num_tags * num_tags + j * num_tags; + auto slot_logit_idx = t * num_tags + j; + M[i][j] = scores[i] + trans_ptr[trans_idx] + + curr_slot_logits_ptr[slot_logit_idx]; + } + } + std::vector idxs; + for (size_t i = 0; i < num_tags; i++) { + T max = 0.0f; + int idx = 0; + for (size_t j = 0; j < num_tags; j++) { + if (M[j][i] > max) { + max = M[j][i]; + idx = j; + } + } + scores[i] = max; + idxs.push_back(idx); + } + paths.push_back(idxs); + } + int scores_max_index = 0; + float scores_max = 0.0f; + for (size_t i = 0; i < scores.size(); i++) { + if (scores[i] > scores_max) { + scores_max = scores[i]; + scores_max_index = i; + } + } + curr_best_path_ptr[seq_len - 1] = scores_max_index; + for (int i = seq_len - 2; i >= 0; i--) { + int index = curr_best_path_ptr[i + 1]; + curr_best_path_ptr[i] = paths[i][index]; + } + } +} + int main() { // 1. Define a ernie faster tokenizer faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer( @@ -39,15 +171,7 @@ int main() { runtime.Init(runtime_option); // 3. Construct input vector - std::vector inputs(runtime.NumInputs()); - for (int i = 0; i < runtime.NumInputs(); ++i) { - inputs[i].dtype = fastdeploy::FDDataType::INT64; - inputs[i].shape = {batch_size, seq_len}; - inputs[i].name = runtime.GetInputInfo(i).name; - inputs[i].data.resize(sizeof(int64_t) * batch_size * seq_len); - } - - // Convert encodings to input_ids, token_type_ids + // 3.1 Convert encodings to input_ids, token_type_ids std::vector input_ids, token_type_ids; for (int i = 0; i < encodings.size(); ++i) { auto&& curr_input_ids = encodings[i].GetIds(); @@ -57,74 +181,52 @@ int main() { token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(), curr_type_ids.end()); } - - memcpy(inputs[0].data.data(), input_ids.data(), inputs[0].data.size()); - memcpy(inputs[1].data.data(), token_type_ids.data(), inputs[1].data.size()); + // 3.2 Set data to input vector + std::vector inputs(runtime.NumInputs()); + void* inputs_ptrs[] = {input_ids.data(), token_type_ids.data()}; + for (int i = 0; i < runtime.NumInputs(); ++i) { + inputs[i].SetExternalData({batch_size, seq_len}, + fastdeploy::FDDataType::INT64, inputs_ptrs[i]); + inputs[i].name = runtime.GetInputInfo(i).name; + } // 4. Infer std::vector outputs(runtime.NumOutputs()); runtime.Infer(inputs, &outputs); // 5. Postprocess - // domain_max_value = np.max(domain_logits, axis=1, keepdims=True) - // intent_max_value = np.max(intent_logits, axis=1, keepdims=True) - fastdeploy::FDTensor domain_max_value, intent_max_value; - Eigen::DefaultDevice dev; - fastdeploy::ReduceFunctor( - dev, outputs[0], &domain_max_value, {1}); - fastdeploy::ReduceFunctor( - dev, outputs[1], &intent_max_value, {1}); - // domain_exp_data = np.exp(domain_logits - domain_max_value) - // intent_exp_data = np.exp(intent_logits - intent_max_value) - fastdeploy::FDTensor domain_exp_data, intent_exp_data; - // Broadcast and diff - fastdeploy::CommonElementwiseBroadcastForward, - float>( - outputs[0], domain_max_value, &domain_exp_data, - fastdeploy::SubFunctor(), 0); - fastdeploy::CommonElementwiseBroadcastForward, - float>( - outputs[1], intent_max_value, &intent_exp_data, - fastdeploy::SubFunctor(), 0); - // domain_exp_data = np.exp(domain_logits - domain_max_value) - // intent_exp_data = np.exp(intent_logits - intent_max_value) - float* domain_exp_data_ptr = reinterpret_cast(domain_exp_data.Data()); - float* intent_exp_data_ptr = reinterpret_cast(intent_exp_data.Data()); - auto trans = [](float a) { return std::exp(a); }; - std::transform(domain_exp_data_ptr, - domain_exp_data_ptr + domain_exp_data.Numel(), - domain_exp_data_ptr, trans); - std::transform(intent_exp_data_ptr, - intent_exp_data_ptr + intent_exp_data.Numel(), - intent_exp_data_ptr, trans); - // domain_probs = domain_exp_data / np.sum(domain_exp_data, axis=1, - // keepdims=True) - // intent_probs = intent_exp_data / np.sum(intent_exp_data, axis=1, - // keepdims=True) - fastdeploy::FDTensor domain_exp_data_sum, intent_exp_data_sum; - fastdeploy::ReduceFunctor( - dev, domain_exp_data, &domain_exp_data_sum, {1}); - fastdeploy::ReduceFunctor( - dev, intent_exp_data, &intent_exp_data_sum, {1}); - fastdeploy::FDTensor domain_probs, intent_probs; - fastdeploy::CommonElementwiseBroadcastForward, - float>( - domain_exp_data, domain_exp_data_sum, &domain_probs, - fastdeploy::DivFunctor(), 0); - fastdeploy::CommonElementwiseBroadcastForward, - float>( - intent_exp_data, intent_exp_data_sum, &intent_probs, - fastdeploy::DivFunctor(), 0); + Softmax(outputs[0], &domain_probs); + Softmax(outputs[1], &intent_probs); fastdeploy::FDTensor domain_max_probs, intent_max_probs; - fastdeploy::ReduceFunctor( - dev, domain_probs, &domain_max_probs, {1}); - fastdeploy::ReduceFunctor( - dev, intent_probs, &intent_max_probs, {1}); + Max(domain_probs, &domain_max_probs); + Max(intent_probs, &intent_max_probs); + std::vector transition; + int num_tags; + LoadTransitionFromFile("joint_transition.txt", &transition, &num_tags); + fastdeploy::FDTensor trans; + trans.SetExternalData({num_tags, num_tags}, fastdeploy::FDDataType::FP32, + transition.data()); + + fastdeploy::FDTensor best_path; + ViterbiDecode(outputs[2], trans, &best_path); // 6. Print result domain_max_probs.PrintInfo(); intent_max_probs.PrintInfo(); + + batch_size = best_path.shape[0]; + seq_len = best_path.shape[1]; + const int64_t* best_path_ptr = + reinterpret_cast(best_path.Data()); + for (int i = 0; i < batch_size; ++i) { + std::cout << "best_path[" << i << "] = "; + for (int j = 0; j < seq_len; ++j) { + std::cout << best_path_ptr[i * seq_len + j] << ", "; + } + std::cout << std::endl; + } + best_path.PrintInfo(); return 0; } \ No newline at end of file