Skip to content

async() on only one of the functions in a chain of the functions scheduled with compute_at() will produce incorrect results #5175

@vksnk

Description

@vksnk

Following example, when only one of the producers is scheduled as async(), will produce incorrect results:

    {
        Func producer1, producer2, consumer;
        Var x, y;

        producer1(x, y) = x + y;
        producer2(x, y) = producer1(x, y);
        consumer(x, y) = producer2(x, y - 1) + producer2(x, y + 1);
        
        consumer.compute_root();
        
        producer1.store_root().compute_at(consumer, y);//.async();
        producer2.store_root().compute_at(consumer, y).async();
        consumer.bound(x, 0, 16).bound(y, 0, 16);

        Buffer<int> out = consumer.realize(16, 16);

        out.for_each_element([&](int x, int y) {
            int correct = 2 * (x + y);
            if (out(x, y) != correct) {
                printf("out(%d, %d) = %d instead of %d\n",
                       x, y, out(x, y), correct);
                exit(-1);
            }
        });
    }

It seems that if producer2 is scheduled with async() then all other functions on the same loop level on which producer depends (producer1 in this case) should be scheduled as async() as well. If producer1 is also scheduled with async() then everything works correctly.

In the IR, the producer2 is correctly moved to the separate fork block and both producer2 and consumer are protected by semaphores. However, producer1 is in the same block as consumer and is not guarded in any way, so when producer2 (which depends on producer1) is producing asynchronously it may run ahead of producer1 and produce incorrect results.

 allocate producer1[int32 * 16 * 4]
 let producer2.folding_semaphore._0 = (halide_semaphore_t *)alloca(16)
 halide_semaphore_init(producer2.folding_semaphore._0, 2)                
 allocate producer2[int32 * 16 * 4]
 let producer2.semaphore_0 = (halide_semaphore_t *)alloca(16)
 halide_semaphore_init(producer2.semaphore_0, 0)
 fork {                                 
  for (consumer.s0.y, 0, 16) {
   acquire (producer2.folding_semaphore._0, 1) {
    let producer2.s0.y.min_1.s = select(0 < consumer.s0.y, 1, -1)
    produce producer2 {                                                             
     consume producer1 {          
      let t169 = select(0 < consumer.s0.y, 1, 3)                                                                                                                        
      let t168 = consumer.s0.y + producer2.s0.y.min_1.s
      for (producer2.s0.y, t168, t169) {
       let t170 = (producer2.s0.y % 4)*16
       for (producer2.s0.x, 0, 16) {                                                
        let t167 = producer2.s0.x + t170
        producer2[t167] = producer1[t167]
       } // for producer2.s0.x
      } // for producer2.s0.y
     }         
     halide_semaphore_release(producer2.semaphore_0, 1)
    }
   } // acquire
  } // for consumer.s0.y
 } {
  produce consumer {
   let t171 = (consumer.min.1*consumer.stride.1) + consumer.min.0
   for (consumer.s0.y, 0, 16) {
    let producer1.s0.y.min_1.s = select(0 < consumer.s0.y, 1, -1)
    produce producer1 {
     let t173 = select(0 < consumer.s0.y, 1, 3)
     let t172 = consumer.s0.y + producer1.s0.y.min_1.s
     for (producer1.s0.y, t172, t173) {
      let t174 = (producer1.s0.y % 4)*16
      for (producer1.s0.x, 0, 16) {
       producer1[producer1.s0.x + t174] = producer1.s0.x + producer1.s0.y
      } // for producer1.s0.x
     } // for producer1.s0.y
    }
    acquire (producer2.semaphore_0, 1) {
     consume producer2 {
      let t175 = ((consumer.s0.y + 3) % 4)*16
      let t176 = ((consumer.s0.y + 1) % 4)*16
      let t177 = (consumer.s0.y*consumer.stride.1) - t171
      for (consumer.s0.x, 0, 16) {
       consumer[consumer.s0.x + t177] = producer2[consumer.s0.x + t175] + producer2[consumer.s0.x + t176]
      } // for consumer.s0.x
     }
    } // acquire
    halide_semaphore_release(producer2.folding_semaphore._0, 1)
   } // for consumer.s0.y
  }
 } 
 free producer1
 free producer2
}

It seems that either it should be an error to have a schedule like this or async() should be propagated to all dependencies of the given producer in the pipeline.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions