diff --git a/src/codegen/codegen_cuda.cc b/src/codegen/codegen_cuda.cc index 0ab56a116eab..d4f83ca4487f 100644 --- a/src/codegen/codegen_cuda.cc +++ b/src/codegen/codegen_cuda.cc @@ -225,8 +225,16 @@ void CodeGenCUDA::PrintStorageSync(const Call* op) { std::string ptr = GetUniqueName("pf"); this->stream << "volatile unsigned* " << ptr << " = &" << vid_global_barrier_state_<< ";\n"; + + this->stream << "#if __CUDA_ARCH__ >= 600\n"; + this->PrintIndent(); + this->stream << "atomicAdd_block(&" << vid_global_barrier_expect_ << ", " + << num_blocks << ");\n"; + this->stream << "#else\n"; this->PrintIndent(); - this->stream << vid_global_barrier_expect_ << " += " << num_blocks << ";\n"; + this->stream << "atomicAdd(&" << vid_global_barrier_expect_ << ", " << num_blocks << ");\n"; + this->stream << "#endif\n"; + this->PrintIndent(); this->stream <<"while (" << ptr << "[0] < " << vid_global_barrier_expect_ << ");\n"; this->EndScope(wb);