Skip to content

Pipeline with two async producers produce incorrect results #7965

@vksnk

Description

@vksnk

The following test produces incorrect results:

{
        Func producer1("producer1"), producer2("producer2"), consumer("consumer");
        Var x, y, xo, yo, xi, yi;

        producer1(x, y) = x + y;
        producer2(x, y) = x * y;
        consumer(x, y) = producer1(x - 1, y - 1) + producer2(x, y) + producer1(x + 1, y + 1);

        consumer
            .compute_root();
        producer1
            .compute_at(consumer, y)
            .store_at(consumer, Var::outermost())
            .fold_storage(y, 4)
            .async();
        producer2
            .compute_at(consumer, y)
            .store_at(consumer, Var::outermost())
            .fold_storage(y, 1)
            .async();

        Buffer<int> out = consumer.realize({128, 128});

        out.for_each_element([&](int x, int y) {
            int correct = 2 * (x + y) + x * y;
            if (out(x, y) != correct) {
                printf("out(%d, %d) = %d instead of %d\n",
                       x, y, out(x, y), correct);
                exit(1);
            }
        });
    }

The final IR looks wrong to me:

 let producer1.folding_semaphore._2 = (struct halide_semaphore_t *)alloca(16)
 halide_semaphore_init(producer1.folding_semaphore._2, 4)
 allocate producer1[int32 * (consumer.extent.0 + 2) * 4]
 let producer1.semaphore_0 = (struct halide_semaphore_t *)alloca(16)
 halide_semaphore_init(producer1.semaphore_0, 0)
 let producer1.semaphore_0_4 = (struct halide_semaphore_t *)alloca(16)
 halide_semaphore_init(producer1.semaphore_0_4, 0)
 let producer2.folding_semaphore._0 = (struct halide_semaphore_t *)alloca(16)
 halide_semaphore_init(producer2.folding_semaphore._0, 1)
 allocate producer2[int32 * consumer.extent.0 * 1]
 let producer2.semaphore_0 = (struct halide_semaphore_t *)alloca(16)
 halide_semaphore_init(producer2.semaphore_0, 0)
 fork {
  let t109 = consumer.min.0 + consumer.min.1
  for (consumer.s0.v1.$n.rebased, 0, consumer.extent.1 + 2) {
   let t104 = select(0 < consumer.s0.v1.$n.rebased, 1, 3)
   acquire (producer1.folding_semaphore._2, t104) {
    produce producer1 {
     let t111 = (((consumer.min.1 + consumer.s0.v1.$n.rebased) + 3) % 4)*(consumer.extent.0 + 2)
     let t110 = consumer.s0.v1.$n.rebased + t109
     for (producer1.s0.v0.rebased, 0, consumer.extent.0 + 2) {
      producer1[producer1.s0.v0.rebased + t111] = (producer1.s0.v0.rebased + t110) + -2
     }
     halide_semaphore_release(producer1.semaphore_0, 1)
     halide_semaphore_release(producer1.semaphore_0_4, 1)
    }
   }
  }
 } {
  for (consumer.s0.v1.$n.rebased, 0, consumer.extent.1 + 2) {
   let t86 = select((consumer.s0.v1.$n.rebased < 2) && (0 < consumer.s0.v1.$n.rebased), 0, 1)
   acquire (producer2.folding_semaphore._0, t86) {
    acquire (producer1.semaphore_0_4, 1) {
     if (2 <= consumer.s0.v1.$n.rebased) {
      consume producer1 {
       produce producer2 {
        let t112 = consumer.min.1 + consumer.s0.v1.$n.rebased
        for (producer2.s0.v0.rebased, 0, consumer.extent.0) {
         producer2[producer2.s0.v0.rebased] = (t112 + -2)*(consumer.min.0 + producer2.s0.v0.rebased)
        }
        halide_semaphore_release(producer2.semaphore_0, 1)
       }
      }
     }
    }
   }
  }
 } {
  produce consumer {
   let t113 = 0 - (consumer.min.1*consumer.stride.1)
   for (consumer.s0.v1.$n.rebased, 0, consumer.extent.1 + 2) {
    acquire (producer1.semaphore_0, 1) {
     if (2 <= consumer.s0.v1.$n.rebased) {
      consume producer1 {
       acquire (producer2.semaphore_0, 1) {
        consume producer2 {
         let t114 = consumer.min.1 + consumer.s0.v1.$n.rebased
         for (consumer.s0.v0.rebased, 0, consumer.extent.0) {
          consumer[(((t114 + -2)*consumer.stride.1) + t113) + consumer.s0.v0.rebased] = producer2[consumer.s0.v0.rebased] + (producer1[((((t114 + 3) % 4)*(consumer.extent.0 + 2)) + consumer.s0.v0.rebased) + 2] + producer1[(((t114 + 1) % 4)*(consumer.extent.0 + 2)) + consumer.s0.v0.rebased])
         }
        }
       }
      }
     }
     halide_semaphore_release(producer2.folding_semaphore._0, 1)
    }
    let t103 = select(consumer.s0.v1.$n.rebased < (consumer.extent.1 + 1), 1, 3)
    halide_semaphore_release(producer1.folding_semaphore._2, t103)
   }
  }
 }
 free producer1
 free producer2
}

For example, producer1 and producer2 seem to be connected via producer1.semaphore_0_4 semaphore, but there is no actual dependency between them.

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions