Skip to content

[Bug] Inconsistency caused by 65535f16*0f16 after using compute_inline #12377

@cxx122

Description

@cxx122
TENSOR_0 = te.compute([14], lambda rck:te.max_value("float16")*te.min_value("uint16"), name ="TENSOR_1")
TENSOR_1 = te.compute([11], lambda oco:te.max_value("uint16")*TENSOR_0[oco], name ="TENSOR_2")

The tir program before compute_inline:

@main = primfn(TENSOR_1_1: handle, TENSOR_2_1: handle) -> ()
  attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
  buffers = {TENSOR_1: Buffer(TENSOR_1_2: Pointer(float16), float16, [14], []),
             TENSOR_2: Buffer(TENSOR_2_2: Pointer(float16), float16, [11], [])}
  buffer_map = {TENSOR_1_1: TENSOR_1, TENSOR_2_1: TENSOR_2}
  preflattened_buffer_map = {TENSOR_1_1: TENSOR_1_3: Buffer(TENSOR_1_2, float16, [14], []), TENSOR_2_1: TENSOR_2_3: Buffer(TENSOR_2_2, float16, [11], [])} {
  for (rck: int32, 0, 11) {
    TENSOR_1[rck] = 0f16
  }
  for (oco: int32, 0, 11) {
    TENSOR_2[oco] = (65535f16*TENSOR_1[oco])
  }
}

The tir program after compute_inline:

@main = primfn(TENSOR_1_1: handle, TENSOR_2_1: handle) -> ()
  attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
  buffers = {TENSOR_1: Buffer(TENSOR_1_2: Pointer(float16), float16, [14], []),
             TENSOR_2: Buffer(TENSOR_2_2: Pointer(float16), float16, [11], [])}
  buffer_map = {TENSOR_1_1: TENSOR_1, TENSOR_2_1: TENSOR_2}
  preflattened_buffer_map = {TENSOR_1_1: TENSOR_1_3: Buffer(TENSOR_1_2, float16, [14], []), TENSOR_2_1: TENSOR_2_3: Buffer(TENSOR_2_2, float16, [11], [])} {
  for (oco: int32, 0, 11) {
    TENSOR_2[oco] = 0f16
  }
}

Actual behavior

AssertionError: 
Not equal to tolerance rtol=1e-05, atol=1e-07

x and y nan location mismatch:
 x: array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
      dtype=float16)
 y: array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float16)

Environment

Operating System: Ubuntu 18.04, TVM version: tag0.9.0 [d361585]

Steps to reproduce

import os
import numpy as np
import tvm
from tvm import te, auto_scheduler, topi
import tvm.testing

TENSOR_0 = te.compute([14], lambda rck:te.max_value("float16")*te.min_value("uint16"), name ="TENSOR_1")
TENSOR_1 = te.compute([11], lambda oco:te.max_value("uint16")*TENSOR_0[oco], name ="TENSOR_2")
s = te.create_schedule(TENSOR_1.op)
tensor_list = [TENSOR_0,TENSOR_1]

dev = tvm.cpu(0)
pre_list = []
after_list = []
for tensor in tensor_list:
    shape = [x.value if 'value' in dir(x) and isinstance(x.value, int) else 1 for x in tensor.shape]
    params = (5*np.random.uniform(size=shape)).astype(tensor.dtype)
    pre_list.append(tvm.nd.array(params.copy(), dev))
    after_list.append(tvm.nd.array(params.copy(), dev))

pre_mod = tvm.lower(s, tensor_list, simple_mode=True)
with tvm.transform.PassContext(opt_level=4):
    f = tvm.build(pre_mod)
f(*pre_list)

s[TENSOR_0].compute_inline()

now_mod = tvm.lower(s, tensor_list, simple_mode=True)
with tvm.transform.PassContext(opt_level=4):
    f = tvm.build(now_mod)
f(*after_list)

tvm.testing.assert_allclose(pre_list[1].numpy(), after_list[1].numpy(),rtol=1e-5)

Metadata

Metadata

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions