From d45cdcb0e5590063810f521d2b10207dc074c87f Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Tue, 26 Mar 2024 15:40:55 +0000 Subject: [PATCH] Hack to ensure cp.async is waited before smem reuse This is a work-around for #2000. --- csrc/device_lower/pass/alias_memory.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/csrc/device_lower/pass/alias_memory.cpp b/csrc/device_lower/pass/alias_memory.cpp index cb1a2a880c3..c0242abeb10 100644 --- a/csrc/device_lower/pass/alias_memory.cpp +++ b/csrc/device_lower/pass/alias_memory.cpp @@ -1971,6 +1971,20 @@ class PromoteReuseSyncModifier : private kir::ExprMutator { debug() << "Inserting block sync before position " << position << std::endl; } + { + // TODO: This is a temporary HACK to work around + // https://github.com/NVIDIA/Fuser/issues/2000 + // Instead, we should only insert these wait statements when we detect + // that there are corresponding unsynced async operations involving the + // buffers in question. We should also update dispatch(Expr*) to check + // not only hasBlockSync but also check if there already exist AsyncWait + // expressions in the interval (or in some cases before the interval but + // after the last write?). + auto new_async_wait = + IrBuilder::create(AsyncOpType::CpAsync); + registerInsertBefore(expr, new_async_wait); + } + auto new_sync = IrBuilder::create(); inserted_syncs_.insert(new_sync); registerInsertBefore(expr, new_sync);