diff --git a/compiler/rustc_codegen_llvm/src/allocator.rs b/compiler/rustc_codegen_llvm/src/allocator.rs index de0b85ebb63b8..22fe5e120ad0b 100644 --- a/compiler/rustc_codegen_llvm/src/allocator.rs +++ b/compiler/rustc_codegen_llvm/src/allocator.rs @@ -176,6 +176,15 @@ fn create_wrapper_function( None }; + if tcx.sess.target.is_like_gpu { + // Conservatively apply convergent to all functions + attributes::apply_to_llfn( + llfn, + llvm::AttributePlace::Function, + &[llvm::AttributeKind::Convergent.create_attr(cx.llcx)], + ); + } + let llbb = unsafe { llvm::LLVMAppendBasicBlockInContext(cx.llcx, llfn, c"entry".as_ptr()) }; let mut bx = SBuilder::build(&cx, llbb); diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs index 8f69f176138cf..ec297cd63e717 100644 --- a/compiler/rustc_codegen_llvm/src/declare.rs +++ b/compiler/rustc_codegen_llvm/src/declare.rs @@ -14,7 +14,7 @@ use std::borrow::Borrow; use itertools::Itertools; -use rustc_codegen_ssa::traits::TypeMembershipCodegenMethods; +use rustc_codegen_ssa::traits::{MiscCodegenMethods, TypeMembershipCodegenMethods}; use rustc_data_structures::fx::FxIndexSet; use rustc_middle::ty::{Instance, Ty}; use rustc_sanitizers::{cfi, kcfi}; @@ -70,6 +70,11 @@ pub(crate) fn declare_raw_fn<'ll, 'tcx>( let mut attrs = SmallVec::<[_; 4]>::new(); + if cx.sess().target.is_like_gpu { + // Conservatively apply convergent to all functions + attrs.push(llvm::AttributeKind::Convergent.create_attr(cx.llcx)); + } + if cx.tcx.sess.opts.cg.no_redzone.unwrap_or(cx.tcx.sess.target.disable_redzone) { attrs.push(llvm::AttributeKind::NoRedZone.create_attr(cx.llcx)); } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 53b9a2bda8944..e9aad63c2d915 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -292,6 +292,7 @@ pub(crate) enum AttributeKind { CapturesNone = 46, SanitizeRealtimeNonblocking = 47, SanitizeRealtimeBlocking = 48, + Convergent = 49, } /// LLVMIntPredicate diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index dda06e9b2bf68..30287fced48d5 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -328,6 +328,7 @@ enum class LLVMRustAttributeKind { CapturesNone = 46, SanitizeRealtimeNonblocking = 47, SanitizeRealtimeBlocking = 48, + Convergent = 49, }; static Attribute::AttrKind fromRust(LLVMRustAttributeKind Kind) { @@ -428,6 +429,8 @@ static Attribute::AttrKind fromRust(LLVMRustAttributeKind Kind) { return Attribute::SanitizeRealtime; case LLVMRustAttributeKind::SanitizeRealtimeBlocking: return Attribute::SanitizeRealtimeBlocking; + case LLVMRustAttributeKind::Convergent: + return Attribute::Convergent; } report_fatal_error("bad LLVMRustAttributeKind"); } diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs index e52475cdd5bde..a782c1ea2fe7e 100644 --- a/compiler/rustc_session/src/session.rs +++ b/compiler/rustc_session/src/session.rs @@ -586,10 +586,16 @@ impl Session { } pub fn mir_opt_level(&self) -> usize { - self.opts - .unstable_opts - .mir_opt_level - .unwrap_or_else(|| if self.opts.optimize != OptLevel::No { 2 } else { 1 }) + if self.target.is_like_gpu { + // Special care needs to be taken for convergent operations (i.e. not duplicating them). + // We do not want to handle these, so do not run any optimizations. + 0 + } else { + self.opts + .unstable_opts + .mir_opt_level + .unwrap_or_else(|| if self.opts.optimize != OptLevel::No { 2 } else { 1 }) + } } /// Calculates the flavor of LTO to use for this compilation. diff --git a/tests/codegen-llvm/gpu-convergent.rs b/tests/codegen-llvm/gpu-convergent.rs new file mode 100644 index 0000000000000..bb9271ab69996 --- /dev/null +++ b/tests/codegen-llvm/gpu-convergent.rs @@ -0,0 +1,28 @@ +// Checks that when compiling for GPU targets, the convergent attribute +// is added to function declarations and definitions. + +//@ add-minicore +//@ revisions: amdgpu nvptx +//@ [amdgpu] compile-flags: --crate-type=rlib --target=amdgcn-amd-amdhsa -Ctarget-cpu=gfx900 +//@ [amdgpu] needs-llvm-components: amdgpu +//@ [nvptx] compile-flags: --crate-type=rlib --target=nvptx64-nvidia-cuda +//@ [nvptx] needs-llvm-components: nvptx +#![feature(no_core, lang_items, abi_gpu_kernel)] +#![no_core] + +extern crate minicore; +use minicore::*; + +extern "C" { + fn ext(); +} + +// CHECK: define {{.*}}_kernel void @fun(i32{{.*}}) unnamed_addr #[[ATTR:[0-9]+]] { +// CHECK: declare void @ext() unnamed_addr #[[ATTR]] +// CHECK: attributes #[[ATTR]] = {{.*}} convergent +#[no_mangle] +pub extern "gpu-kernel" fn fun(_: i32) { + unsafe { + ext(); + } +}