-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Closed
Description
I want to vectorize an axis in sum reduction, here is the code
import tvm
import numpy as np
n = 64
A = tvm.placeholder((n, n), name='A', dtype='float32')
k = tvm.reduce_axis((0, n), "k")
B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="C")
s = tvm.create_schedule(B.op)
k, ki = s[B].split(k, factor=8)
s[B].vectorize(ki) # vectorize here
print(tvm.lower(s, [A, B], simple_mode=True))
func = tvm.build(s, [A, B], target='llvm')
# verify correctness
ctx = tvm.cpu(0)
a = tvm.nd.array(np.random.uniform(size=(n, n)).astype(A.dtype), ctx=ctx)
b = tvm.nd.array(np.zeros(shape=(n,), dtype=B.dtype), ctx=ctx)
func(a, b)
np.testing.assert_allclose(b.asnumpy(), np.sum(a.asnumpy(), axis=1), rtol=1e-4)TVM did not output any error message, but the answer is wrong. Output:
produce C {
for (i, 0, 64) {
C[i] = 0.000000f
for (k.outer, 0, 8) {
C[x8(i)] = (x8(C[i]) + A[ramp((((i*8) + k.outer)*8), 1, 8)])
}
}
}
...
(mismatch 100.0%)
x: array([ 5.928585, 4.939113, 4.34326 , 3.470788, 2.35553 , 2.628344,
3.791345, 4.616838, 3.214629, 2.659637, 4.374955, 5.651574,
2.995839, 4.34803 , 5.436972, 4.196923, 3.906123, 4.364611,...
y: array([ 33.369698, 33.435944, 32.376495, 33.983963, 33.391079,
26.468966, 31.47113 , 30.853607, 31.724657, 28.669315,
31.928965, 36.953148, 32.466213, 28.177387, 31.322906,...
Metadata
Metadata
Assignees
Labels
No labels