-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Closed
Labels
Description
TENSOR_0 = te.compute([14], lambda rck:te.max_value("float16")*te.min_value("uint16"), name ="TENSOR_1")
TENSOR_1 = te.compute([11], lambda oco:te.max_value("uint16")*TENSOR_0[oco], name ="TENSOR_2")
The tir program before compute_inline:
@main = primfn(TENSOR_1_1: handle, TENSOR_2_1: handle) -> ()
attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
buffers = {TENSOR_1: Buffer(TENSOR_1_2: Pointer(float16), float16, [14], []),
TENSOR_2: Buffer(TENSOR_2_2: Pointer(float16), float16, [11], [])}
buffer_map = {TENSOR_1_1: TENSOR_1, TENSOR_2_1: TENSOR_2}
preflattened_buffer_map = {TENSOR_1_1: TENSOR_1_3: Buffer(TENSOR_1_2, float16, [14], []), TENSOR_2_1: TENSOR_2_3: Buffer(TENSOR_2_2, float16, [11], [])} {
for (rck: int32, 0, 11) {
TENSOR_1[rck] = 0f16
}
for (oco: int32, 0, 11) {
TENSOR_2[oco] = (65535f16*TENSOR_1[oco])
}
}
The tir program after compute_inline:
@main = primfn(TENSOR_1_1: handle, TENSOR_2_1: handle) -> ()
attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
buffers = {TENSOR_1: Buffer(TENSOR_1_2: Pointer(float16), float16, [14], []),
TENSOR_2: Buffer(TENSOR_2_2: Pointer(float16), float16, [11], [])}
buffer_map = {TENSOR_1_1: TENSOR_1, TENSOR_2_1: TENSOR_2}
preflattened_buffer_map = {TENSOR_1_1: TENSOR_1_3: Buffer(TENSOR_1_2, float16, [14], []), TENSOR_2_1: TENSOR_2_3: Buffer(TENSOR_2_2, float16, [11], [])} {
for (oco: int32, 0, 11) {
TENSOR_2[oco] = 0f16
}
}
Actual behavior
AssertionError:
Not equal to tolerance rtol=1e-05, atol=1e-07
x and y nan location mismatch:
x: array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
dtype=float16)
y: array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float16)
Environment
Operating System: Ubuntu 18.04, TVM version: tag0.9.0 [d361585]
Steps to reproduce
import os
import numpy as np
import tvm
from tvm import te, auto_scheduler, topi
import tvm.testing
TENSOR_0 = te.compute([14], lambda rck:te.max_value("float16")*te.min_value("uint16"), name ="TENSOR_1")
TENSOR_1 = te.compute([11], lambda oco:te.max_value("uint16")*TENSOR_0[oco], name ="TENSOR_2")
s = te.create_schedule(TENSOR_1.op)
tensor_list = [TENSOR_0,TENSOR_1]
dev = tvm.cpu(0)
pre_list = []
after_list = []
for tensor in tensor_list:
shape = [x.value if 'value' in dir(x) and isinstance(x.value, int) else 1 for x in tensor.shape]
params = (5*np.random.uniform(size=shape)).astype(tensor.dtype)
pre_list.append(tvm.nd.array(params.copy(), dev))
after_list.append(tvm.nd.array(params.copy(), dev))
pre_mod = tvm.lower(s, tensor_list, simple_mode=True)
with tvm.transform.PassContext(opt_level=4):
f = tvm.build(pre_mod)
f(*pre_list)
s[TENSOR_0].compute_inline()
now_mod = tvm.lower(s, tensor_list, simple_mode=True)
with tvm.transform.PassContext(opt_level=4):
f = tvm.build(now_mod)
f(*after_list)
tvm.testing.assert_allclose(pre_list[1].numpy(), after_list[1].numpy(),rtol=1e-5)