Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions csrcs/fastdeploy/core/fd_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ void* FDTensor::Data() {
return data.data();
}

const void* FDTensor::Data() const {
if (external_data_ptr != nullptr) {
return external_data_ptr;
}
return data.data();
}

void FDTensor::SetExternalData(const std::vector<int>& new_shape,
const FDDataType& data_type, void* data_buffer) {
dtype = data_type;
Expand Down
4 changes: 3 additions & 1 deletion csrcs/fastdeploy/core/fd_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ struct FASTDEPLOY_DECL FDTensor {
// will copy to cpu store in `temporary_cpu_buffer`
void* Data();

const void* Data() const;

// Set user memory buffer for Tensor, the memory is managed by
// the user it self, but the Tensor will share the memory with user
// So take care with the user buffer
Expand Down Expand Up @@ -81,4 +83,4 @@ struct FASTDEPLOY_DECL FDTensor {
explicit FDTensor(const std::string& tensor_name);
};

} // namespace fastdeploy
} // namespace fastdeploy
4 changes: 2 additions & 2 deletions examples/text/compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ void CommonElementwiseBroadcastForward(const FDTensor& x, const FDTensor& y,
y_dims_array.data(), out_dims_array.data(), max_dim,
axis);

const T* x_data = reinterpret_cast<const T*>(x.data.data());
const T* y_data = reinterpret_cast<const T*>(y.data.data());
const T* x_data = reinterpret_cast<const T*>(x.Data());
const T* y_data = reinterpret_cast<const T*>(y.Data());

z->Allocate(out_dims_array, TypeToDataType<OutType>::dtype);
OutType* out_data = reinterpret_cast<T*>(z->MutableData());
Expand Down
234 changes: 168 additions & 66 deletions examples/text/ernie_tokencls.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,144 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <sstream>

#include "compute.h"
#include "fastdeploy/text.h"
#include "tokenizers/ernie_faster_tokenizer.h"

using namespace paddlenlp;

void LoadTransitionFromFile(const std::string& file,
std::vector<float>* transitions, int* num_tags) {
std::ifstream fin(file);
std::string curr_transition;
float transition;
int i = 0;
while (fin) {
std::getline(fin, curr_transition);
std::istringstream iss(curr_transition);
while (iss) {
iss >> transition;
transitions->push_back(transition);
}
if (curr_transition != "") {
++i;
}
}
*num_tags = i;
}

// Only useful for axis = -1
template <typename T>
void Softmax(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) {
auto softmax_func = [](const T* score_vec, T* softmax_vec, int label_num) {
double score_max = *(std::max_element(score_vec, score_vec + label_num));
double e_sum = 0;
for (int j = 0; j < label_num; j++) {
softmax_vec[j] = std::exp(score_vec[j] - score_max);
e_sum += softmax_vec[j];
}
for (int k = 0; k < label_num; k++) {
softmax_vec[k] /= e_sum;
}
};

std::vector<int32_t> output_shape;
for (int i = 0; i < input.shape.size(); ++i) {
output_shape.push_back(input.shape[i]);
}
output->Allocate(output_shape, input.dtype);
int label_num = output_shape.back();
int batch_size = input.Numel() / label_num;
int offset = 0;
const T* input_ptr = reinterpret_cast<const T*>(input.Data());
T* output_ptr = reinterpret_cast<T*>(output->Data());
for (int i = 0; i < batch_size; ++i) {
softmax_func(input_ptr + offset, output_ptr + offset, label_num);
offset += label_num;
}
}

// Only useful for axis = -1
template <typename T>
void Max(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) {
std::vector<int32_t> output_shape;
for (int i = 0; i < input.shape.size() - 1; ++i) {
output_shape.push_back(input.shape[i]);
}
output_shape.push_back(1);
output->Allocate(output_shape, input.dtype);
int batch_size = output->Numel();
int label_num = input.shape.back();
int offset = 0;
const T* input_ptr = reinterpret_cast<const T*>(input.Data());
T* output_ptr = reinterpret_cast<T*>(output->Data());
for (int i = 0; i < batch_size; ++i) {
output_ptr[i] =
*(std::max_element(input_ptr + offset, input_ptr + offset + label_num));
offset += label_num;
}
}

template <typename T>
void ViterbiDecode(const fastdeploy::FDTensor& slot_logits,
const fastdeploy::FDTensor& trans,
fastdeploy::FDTensor* best_path) {
int batch_size = slot_logits.shape[0];
int seq_len = slot_logits.shape[1];
int num_tags = slot_logits.shape[2];
best_path->Allocate({batch_size, seq_len}, fastdeploy::FDDataType::INT64);

const T* slot_logits_ptr = reinterpret_cast<const T*>(slot_logits.Data());
const T* trans_ptr = reinterpret_cast<const T*>(trans.Data());
int64_t* best_path_ptr = reinterpret_cast<int64_t*>(best_path->Data());
std::vector<T> scores(num_tags);
std::copy(slot_logits_ptr, slot_logits_ptr + num_tags, scores.begin());
std::vector<std::vector<T>> M(num_tags, std::vector<T>(num_tags));
for (int b = 0; b < batch_size; ++b) {
std::vector<std::vector<int>> paths;
const T* curr_slot_logits_ptr = slot_logits_ptr + b * seq_len * num_tags;
int64_t* curr_best_path_ptr = best_path_ptr + b * seq_len;
for (int t = 1; t < seq_len; t++) {
for (size_t i = 0; i < num_tags; i++) {
for (size_t j = 0; j < num_tags; j++) {
auto trans_idx = i * num_tags * num_tags + j * num_tags;
auto slot_logit_idx = t * num_tags + j;
M[i][j] = scores[i] + trans_ptr[trans_idx] +
curr_slot_logits_ptr[slot_logit_idx];
}
}
std::vector<int> idxs;
for (size_t i = 0; i < num_tags; i++) {
T max = 0.0f;
int idx = 0;
for (size_t j = 0; j < num_tags; j++) {
if (M[j][i] > max) {
max = M[j][i];
idx = j;
}
}
scores[i] = max;
idxs.push_back(idx);
}
paths.push_back(idxs);
}
int scores_max_index = 0;
float scores_max = 0.0f;
for (size_t i = 0; i < scores.size(); i++) {
if (scores[i] > scores_max) {
scores_max = scores[i];
scores_max_index = i;
}
}
curr_best_path_ptr[seq_len - 1] = scores_max_index;
for (int i = seq_len - 2; i >= 0; i--) {
int index = curr_best_path_ptr[i + 1];
curr_best_path_ptr[i] = paths[i][index];
}
}
}

int main() {
// 1. Define a ernie faster tokenizer
faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer(
Expand All @@ -39,15 +171,7 @@ int main() {
runtime.Init(runtime_option);

// 3. Construct input vector
std::vector<fastdeploy::FDTensor> inputs(runtime.NumInputs());
for (int i = 0; i < runtime.NumInputs(); ++i) {
inputs[i].dtype = fastdeploy::FDDataType::INT64;
inputs[i].shape = {batch_size, seq_len};
inputs[i].name = runtime.GetInputInfo(i).name;
inputs[i].data.resize(sizeof(int64_t) * batch_size * seq_len);
}

// Convert encodings to input_ids, token_type_ids
// 3.1 Convert encodings to input_ids, token_type_ids
std::vector<int64_t> input_ids, token_type_ids;
for (int i = 0; i < encodings.size(); ++i) {
auto&& curr_input_ids = encodings[i].GetIds();
Expand All @@ -57,74 +181,52 @@ int main() {
token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(),
curr_type_ids.end());
}

memcpy(inputs[0].data.data(), input_ids.data(), inputs[0].data.size());
memcpy(inputs[1].data.data(), token_type_ids.data(), inputs[1].data.size());
// 3.2 Set data to input vector
std::vector<fastdeploy::FDTensor> inputs(runtime.NumInputs());
void* inputs_ptrs[] = {input_ids.data(), token_type_ids.data()};
for (int i = 0; i < runtime.NumInputs(); ++i) {
inputs[i].SetExternalData({batch_size, seq_len},
fastdeploy::FDDataType::INT64, inputs_ptrs[i]);
inputs[i].name = runtime.GetInputInfo(i).name;
}

// 4. Infer
std::vector<fastdeploy::FDTensor> outputs(runtime.NumOutputs());
runtime.Infer(inputs, &outputs);

// 5. Postprocess
// domain_max_value = np.max(domain_logits, axis=1, keepdims=True)
// intent_max_value = np.max(intent_logits, axis=1, keepdims=True)
fastdeploy::FDTensor domain_max_value, intent_max_value;
Eigen::DefaultDevice dev;
fastdeploy::ReduceFunctor<float, 2, 1, fastdeploy::MaxFunctor>(
dev, outputs[0], &domain_max_value, {1});
fastdeploy::ReduceFunctor<float, 2, 1, fastdeploy::MaxFunctor>(
dev, outputs[1], &intent_max_value, {1});
// domain_exp_data = np.exp(domain_logits - domain_max_value)
// intent_exp_data = np.exp(intent_logits - intent_max_value)
fastdeploy::FDTensor domain_exp_data, intent_exp_data;
// Broadcast and diff
fastdeploy::CommonElementwiseBroadcastForward<fastdeploy::SubFunctor<float>,
float>(
outputs[0], domain_max_value, &domain_exp_data,
fastdeploy::SubFunctor<float>(), 0);
fastdeploy::CommonElementwiseBroadcastForward<fastdeploy::SubFunctor<float>,
float>(
outputs[1], intent_max_value, &intent_exp_data,
fastdeploy::SubFunctor<float>(), 0);
// domain_exp_data = np.exp(domain_logits - domain_max_value)
// intent_exp_data = np.exp(intent_logits - intent_max_value)
float* domain_exp_data_ptr = reinterpret_cast<float*>(domain_exp_data.Data());
float* intent_exp_data_ptr = reinterpret_cast<float*>(intent_exp_data.Data());
auto trans = [](float a) { return std::exp(a); };
std::transform(domain_exp_data_ptr,
domain_exp_data_ptr + domain_exp_data.Numel(),
domain_exp_data_ptr, trans);
std::transform(intent_exp_data_ptr,
intent_exp_data_ptr + intent_exp_data.Numel(),
intent_exp_data_ptr, trans);
// domain_probs = domain_exp_data / np.sum(domain_exp_data, axis=1,
// keepdims=True)
// intent_probs = intent_exp_data / np.sum(intent_exp_data, axis=1,
// keepdims=True)
fastdeploy::FDTensor domain_exp_data_sum, intent_exp_data_sum;
fastdeploy::ReduceFunctor<float, 2, 1, fastdeploy::SumFunctor>(
dev, domain_exp_data, &domain_exp_data_sum, {1});
fastdeploy::ReduceFunctor<float, 2, 1, fastdeploy::SumFunctor>(
dev, intent_exp_data, &intent_exp_data_sum, {1});

fastdeploy::FDTensor domain_probs, intent_probs;
fastdeploy::CommonElementwiseBroadcastForward<fastdeploy::DivFunctor<float>,
float>(
domain_exp_data, domain_exp_data_sum, &domain_probs,
fastdeploy::DivFunctor<float>(), 0);
fastdeploy::CommonElementwiseBroadcastForward<fastdeploy::DivFunctor<float>,
float>(
intent_exp_data, intent_exp_data_sum, &intent_probs,
fastdeploy::DivFunctor<float>(), 0);
Softmax<float>(outputs[0], &domain_probs);
Softmax<float>(outputs[1], &intent_probs);

fastdeploy::FDTensor domain_max_probs, intent_max_probs;
fastdeploy::ReduceFunctor<float, 2, 1, fastdeploy::MaxFunctor>(
dev, domain_probs, &domain_max_probs, {1});
fastdeploy::ReduceFunctor<float, 2, 1, fastdeploy::MaxFunctor>(
dev, intent_probs, &intent_max_probs, {1});
Max<float>(domain_probs, &domain_max_probs);
Max<float>(intent_probs, &intent_max_probs);

std::vector<float> transition;
int num_tags;
LoadTransitionFromFile("joint_transition.txt", &transition, &num_tags);
fastdeploy::FDTensor trans;
trans.SetExternalData({num_tags, num_tags}, fastdeploy::FDDataType::FP32,
transition.data());

fastdeploy::FDTensor best_path;
ViterbiDecode<float>(outputs[2], trans, &best_path);
// 6. Print result
domain_max_probs.PrintInfo();
intent_max_probs.PrintInfo();

batch_size = best_path.shape[0];
seq_len = best_path.shape[1];
const int64_t* best_path_ptr =
reinterpret_cast<const int64_t*>(best_path.Data());
for (int i = 0; i < batch_size; ++i) {
std::cout << "best_path[" << i << "] = ";
for (int j = 0; j < seq_len; ++j) {
std::cout << best_path_ptr[i * seq_len + j] << ", ";
}
std::cout << std::endl;
}
best_path.PrintInfo();
return 0;
}