AOT C Codegen Type Issue

I have a model that I tried to deploy using AOT runtime. The model final output has type `int8` and based on that I allocated a placeholder for the output like this:

```
int8_t output_data0[] ={0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };
```

However, when I look at the C code generated for AOT runtime library, here's what has been generated:

```
TVM_DLL int32_t tvm__run_func(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) {
  TVMValue stack[5];
  void* stack_tcode = stack;
  TVMValue stack1[9];
  void* stack_value = stack1;
  void* arg0 = (((TVMValue*)args)[0].v_handle);
  int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)];
  void* arg1 = (((TVMValue*)args)[1].v_handle);
  int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)];
  void* input_0 = arg0;
  void* output_0 = arg1;
...

TVMValue stack245[6];
  void* sid_59_value1 = stack245;
  (((DLTensor*)sid_59_value1)[0].data) = sid_59;
  TVMValue stack246[1];
  void* param_60_array = stack246;
  TVMValue stack247[1];
  void* ret_value110 = stack247;
  TVMValue stack248[1];
  void* ret_value111 = stack248;
  TVMValue stack249[1];
  void* param_60_value = stack249;
  (((TVMValue*)param_60_value)[0].v_int64) = 60;
  (void)_lookup_linked_param(param_60_value, 0, 0, ret_value111, ret_value110, 0);
  (((DLTensor*)param_60_array)[0].data) = (((TVMValue*)ret_value111)[0].v_handle);
  TVMValue stack250[1];
  void* param_61_array = stack250;
  TVMValue stack251[1];
  void* ret_value112 = stack251;
  TVMValue stack252[1];
  void* ret_value113 = stack252;
  TVMValue stack253[1];
  void* param_61_value = stack253;
  (((TVMValue*)param_61_value)[0].v_int64) = 61;
  (void)_lookup_linked_param(param_61_value, 0, 0, ret_value113, ret_value112, 0);
  (((DLTensor*)param_61_array)[0].data) = (((TVMValue*)ret_value113)[0].v_handle);
  (((TVMValue*)stack_value)[0].v_handle) = sid_59_value1;
  ((int32_t*)stack_tcode)[(0)] = 3;
  (((TVMValue*)stack_value)[1].v_handle) = param_60_array;
  ((int32_t*)stack_tcode)[(1)] = 3;
  (((TVMValue*)stack_value)[2].v_handle) = param_61_array;
  ((int32_t*)stack_tcode)[(2)] = 3;
  (((TVMValue*)stack_value)[3].v_handle) = output_0;
  ((int32_t*)stack_tcode)[(3)] = 3;
  TVMValue ret_val12;
  int ret_type_code12;
  if (fused_nn_contrib_dense_pack_add_fixed_point_multiply_add_clip_cast_cast_subtract_14669711146056581479_( (TVMValue*) stack_value , (int*) stack_tcode, 4, &ret_val12, &ret_type_code12, NULL) != 0){
    return -1;
  }
TVMValue stack254[6];
  void* sid_59_value2 = stack254;
  (((DLTensor*)sid_59_value2)[0].data) = sid_59;
  (((TVMValue*)stack_value)[0].v_handle) = output_0;
  ((int32_t*)stack_tcode)[(0)] = 3;
  (((TVMValue*)stack_value)[1].v_handle) = sid_59_value2;
  ((int32_t*)stack_tcode)[(1)] = 3;
  TVMValue ret_val13;
  int ret_type_code13;
  if (fused_nn_softmax( (TVMValue*) stack_value , (int*) stack_tcode, 2, &ret_val13, &ret_type_code13, NULL) != 0){
    return -1;
  }
  TVMValue stack255[6];
  void* sid_59_value3 = stack255;
  (((DLTensor*)sid_59_value3)[0].data) = sid_59;
  (((TVMValue*)stack_value)[0].v_handle) = sid_59_value3;
  ((int32_t*)stack_tcode)[(0)] = 3;
  (((TVMValue*)stack_value)[1].v_handle) = output_0;
  ((int32_t*)stack_tcode)[(1)] = 3;
  TVMValue ret_val14;
  int ret_type_code14;
  if (fused_divide_add_round_cast_clip_cast( (TVMValue*) stack_value , (int*) stack_tcode, 2, &ret_val14, &ret_type_code14, NULL) != 0){
    return -1;
  }
```
`output_0` is the placeholder for final output (`output_data0`) that we passed to function `tvm__run_func` and it has `int8` type, however, `output_0` has been used other intermediate functions and assigned other types like `float`. For ex. `fused_nn_contrib_dense_pack_add_fixed_point_multiply_add_clip_cast_cast_subtract_14669711146056581479_` function  is defined here:

```
TVM_DLL int32_t fused_nn_contrib_dense_pack_add_fixed_point_multiply_add_clip_cast_cast_subtract_14669711146056581479_(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) {
  void* arg0 = (((TVMValue*)args)[0].v_handle);
  int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)];
  void* arg1 = (((TVMValue*)args)[1].v_handle);
  int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)];
  void* arg2 = (((TVMValue*)args)[2].v_handle);
  int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)];
  void* arg3 = (((TVMValue*)args)[3].v_handle);
  int32_t arg3_code = ((int32_t*)arg_type_ids)[(3)];
  void* placeholder = (((DLTensor*)arg0)[0].data);
  void* arg0_shape = (((DLTensor*)arg0)[0].shape);
  void* arg0_strides = (((DLTensor*)arg0)[0].strides);
  int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id);
  void* placeholder1 = (((DLTensor*)arg1)[0].data);
  void* arg1_shape = (((DLTensor*)arg1)[0].shape);
  void* arg1_strides = (((DLTensor*)arg1)[0].strides);
  void* placeholder2 = (((DLTensor*)arg2)[0].data);
  void* arg2_shape = (((DLTensor*)arg2)[0].shape);
  void* arg2_strides = (((DLTensor*)arg2)[0].strides);
  void* T_multiply = (((DLTensor*)arg3)[0].data);
  void* arg3_shape = (((DLTensor*)arg3)[0].shape);
  void* arg3_strides = (((DLTensor*)arg3)[0].strides);
  if (!(arg0_strides == NULL)) {
  }
  if (!(arg1_strides == NULL)) {
  }
  if (!(arg2_strides == NULL)) {
  }
  if (!(arg3_strides == NULL)) {
  }
  void* compute_global = TVMBackendAllocWorkspace(1, dev_id, (uint64_t)48, 0, 32);
  if (compute_global == NULL) {
    return -1;
  }
  for (int32_t x_c_init = 0; x_c_init < 12; ++x_c_init) {
    ((int32_t*)compute_global)[(x_c_init)] = 0;
  }
  for (int32_t k_outer = 0; k_outer < 64; ++k_outer) {
    for (int32_t x_c = 0; x_c < 12; ++x_c) {
      ((int32_t*)compute_global)[(x_c)] = (((int32_t*)compute_global)[(x_c)] + (((int32_t)((int16_t*)placeholder)[(k_outer)]) * ((int32_t)((int16_t*)placeholder1)[(((k_outer * 12) + x_c))])));
    }
  }
  for (int32_t ax1_inner_inner = 0; ax1_inner_inner < 12; ++ax1_inner_inner) {
    int32_t _1 = ((int32_t)(((((0 != 0) ? (((int64_t)(((int32_t*)compute_global)[(ax1_inner_inner)] + ((int32_t*)placeholder2)[(ax1_inner_inner)])) << ((int64_t)0)) : ((int64_t)(((int32_t*)compute_global)[(ax1_inner_inner)] + ((int32_t*)placeholder2)[(ax1_inner_inner)]))) * (int64_t)1278221461) + ((int64_t)1 << ((int64_t)((7 + 31) - 1)))) >> ((int64_t)(7 + 31)))) + 14;
    int32_t _2 = (_1) < (127) ? (_1) : (127);
    ((float*)T_multiply)[(ax1_inner_inner)] = (((float)(((int32_t)((int8_t)((_2) > (-128) ? (_2) : (-128)))) - 14)) * 1.446925e-01f);
  }
  if (TVMBackendFreeWorkspace(1, dev_id, compute_global) != 0) {
    return -1;
  }
  return 0;
}
```
and here `T_multiply` is the output_0 which is interpreted as float type and this cause memory overriding of other variables.

One quick fix is to assign the final output as the largest size that we used in graph(float32/float64) to avoid this problem, but we need a better way to fix this problem.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

AOT C Codegen Type Issue #8062

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

AOT C Codegen Type Issue #8062

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions