-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Description
Following example, when only one of the producers is scheduled as async(), will produce incorrect results:
{
Func producer1, producer2, consumer;
Var x, y;
producer1(x, y) = x + y;
producer2(x, y) = producer1(x, y);
consumer(x, y) = producer2(x, y - 1) + producer2(x, y + 1);
consumer.compute_root();
producer1.store_root().compute_at(consumer, y);//.async();
producer2.store_root().compute_at(consumer, y).async();
consumer.bound(x, 0, 16).bound(y, 0, 16);
Buffer<int> out = consumer.realize(16, 16);
out.for_each_element([&](int x, int y) {
int correct = 2 * (x + y);
if (out(x, y) != correct) {
printf("out(%d, %d) = %d instead of %d\n",
x, y, out(x, y), correct);
exit(-1);
}
});
}It seems that if producer2 is scheduled with async() then all other functions on the same loop level on which producer depends (producer1 in this case) should be scheduled as async() as well. If producer1 is also scheduled with async() then everything works correctly.
In the IR, the producer2 is correctly moved to the separate fork block and both producer2 and consumer are protected by semaphores. However, producer1 is in the same block as consumer and is not guarded in any way, so when producer2 (which depends on producer1) is producing asynchronously it may run ahead of producer1 and produce incorrect results.
allocate producer1[int32 * 16 * 4]
let producer2.folding_semaphore._0 = (halide_semaphore_t *)alloca(16)
halide_semaphore_init(producer2.folding_semaphore._0, 2)
allocate producer2[int32 * 16 * 4]
let producer2.semaphore_0 = (halide_semaphore_t *)alloca(16)
halide_semaphore_init(producer2.semaphore_0, 0)
fork {
for (consumer.s0.y, 0, 16) {
acquire (producer2.folding_semaphore._0, 1) {
let producer2.s0.y.min_1.s = select(0 < consumer.s0.y, 1, -1)
produce producer2 {
consume producer1 {
let t169 = select(0 < consumer.s0.y, 1, 3)
let t168 = consumer.s0.y + producer2.s0.y.min_1.s
for (producer2.s0.y, t168, t169) {
let t170 = (producer2.s0.y % 4)*16
for (producer2.s0.x, 0, 16) {
let t167 = producer2.s0.x + t170
producer2[t167] = producer1[t167]
} // for producer2.s0.x
} // for producer2.s0.y
}
halide_semaphore_release(producer2.semaphore_0, 1)
}
} // acquire
} // for consumer.s0.y
} {
produce consumer {
let t171 = (consumer.min.1*consumer.stride.1) + consumer.min.0
for (consumer.s0.y, 0, 16) {
let producer1.s0.y.min_1.s = select(0 < consumer.s0.y, 1, -1)
produce producer1 {
let t173 = select(0 < consumer.s0.y, 1, 3)
let t172 = consumer.s0.y + producer1.s0.y.min_1.s
for (producer1.s0.y, t172, t173) {
let t174 = (producer1.s0.y % 4)*16
for (producer1.s0.x, 0, 16) {
producer1[producer1.s0.x + t174] = producer1.s0.x + producer1.s0.y
} // for producer1.s0.x
} // for producer1.s0.y
}
acquire (producer2.semaphore_0, 1) {
consume producer2 {
let t175 = ((consumer.s0.y + 3) % 4)*16
let t176 = ((consumer.s0.y + 1) % 4)*16
let t177 = (consumer.s0.y*consumer.stride.1) - t171
for (consumer.s0.x, 0, 16) {
consumer[consumer.s0.x + t177] = producer2[consumer.s0.x + t175] + producer2[consumer.s0.x + t176]
} // for consumer.s0.x
}
} // acquire
halide_semaphore_release(producer2.folding_semaphore._0, 1)
} // for consumer.s0.y
}
}
free producer1
free producer2
}It seems that either it should be an error to have a schedule like this or async() should be propagated to all dependencies of the given producer in the pipeline.