From 0900d612df51ec19a95f9d739bc56237f08238a4 Mon Sep 17 00:00:00 2001 From: Flakebi Date: Thu, 4 Dec 2025 02:42:06 +0100 Subject: [PATCH 1/2] Do not run mir opts for GPUs GPU targets have convergent operations that require careful handling when running optimizations. E.g. they must not be duplicated. An example convergent operation is a barrier/syncthreads. We do not want to deal with convergent operations in mir optimizations, so set the optimization level to 0 and skip all optimizations. This affects the amdgpu and nvptx targets. --- compiler/rustc_session/src/session.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs index e52475cdd5bde..a782c1ea2fe7e 100644 --- a/compiler/rustc_session/src/session.rs +++ b/compiler/rustc_session/src/session.rs @@ -586,10 +586,16 @@ impl Session { } pub fn mir_opt_level(&self) -> usize { - self.opts - .unstable_opts - .mir_opt_level - .unwrap_or_else(|| if self.opts.optimize != OptLevel::No { 2 } else { 1 }) + if self.target.is_like_gpu { + // Special care needs to be taken for convergent operations (i.e. not duplicating them). + // We do not want to handle these, so do not run any optimizations. + 0 + } else { + self.opts + .unstable_opts + .mir_opt_level + .unwrap_or_else(|| if self.opts.optimize != OptLevel::No { 2 } else { 1 }) + } } /// Calculates the flavor of LTO to use for this compilation. From f17636bc1bc10a2a552892241989f2de9fe006b4 Mon Sep 17 00:00:00 2001 From: Flakebi Date: Thu, 4 Dec 2025 10:37:26 +0100 Subject: [PATCH 2/2] Add convergent attribute to funcs for GPU targets On targets with convergent operations, we need to add the convergent attribute to all functions that run convergent operations. Following clang, we can conservatively apply the attribute to all functions when compiling for such a target and rely on LLVM optimizing away the attribute in cases where it is not necessary. This affects the amdgpu and nvptx targets. --- compiler/rustc_codegen_llvm/src/allocator.rs | 9 ++++++ compiler/rustc_codegen_llvm/src/declare.rs | 7 ++++- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 1 + .../rustc_llvm/llvm-wrapper/RustWrapper.cpp | 3 ++ tests/codegen-llvm/gpu-convergent.rs | 28 +++++++++++++++++++ 5 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 tests/codegen-llvm/gpu-convergent.rs diff --git a/compiler/rustc_codegen_llvm/src/allocator.rs b/compiler/rustc_codegen_llvm/src/allocator.rs index de0b85ebb63b8..22fe5e120ad0b 100644 --- a/compiler/rustc_codegen_llvm/src/allocator.rs +++ b/compiler/rustc_codegen_llvm/src/allocator.rs @@ -176,6 +176,15 @@ fn create_wrapper_function( None }; + if tcx.sess.target.is_like_gpu { + // Conservatively apply convergent to all functions + attributes::apply_to_llfn( + llfn, + llvm::AttributePlace::Function, + &[llvm::AttributeKind::Convergent.create_attr(cx.llcx)], + ); + } + let llbb = unsafe { llvm::LLVMAppendBasicBlockInContext(cx.llcx, llfn, c"entry".as_ptr()) }; let mut bx = SBuilder::build(&cx, llbb); diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs index 8f69f176138cf..ec297cd63e717 100644 --- a/compiler/rustc_codegen_llvm/src/declare.rs +++ b/compiler/rustc_codegen_llvm/src/declare.rs @@ -14,7 +14,7 @@ use std::borrow::Borrow; use itertools::Itertools; -use rustc_codegen_ssa::traits::TypeMembershipCodegenMethods; +use rustc_codegen_ssa::traits::{MiscCodegenMethods, TypeMembershipCodegenMethods}; use rustc_data_structures::fx::FxIndexSet; use rustc_middle::ty::{Instance, Ty}; use rustc_sanitizers::{cfi, kcfi}; @@ -70,6 +70,11 @@ pub(crate) fn declare_raw_fn<'ll, 'tcx>( let mut attrs = SmallVec::<[_; 4]>::new(); + if cx.sess().target.is_like_gpu { + // Conservatively apply convergent to all functions + attrs.push(llvm::AttributeKind::Convergent.create_attr(cx.llcx)); + } + if cx.tcx.sess.opts.cg.no_redzone.unwrap_or(cx.tcx.sess.target.disable_redzone) { attrs.push(llvm::AttributeKind::NoRedZone.create_attr(cx.llcx)); } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 53b9a2bda8944..e9aad63c2d915 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -292,6 +292,7 @@ pub(crate) enum AttributeKind { CapturesNone = 46, SanitizeRealtimeNonblocking = 47, SanitizeRealtimeBlocking = 48, + Convergent = 49, } /// LLVMIntPredicate diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index dda06e9b2bf68..30287fced48d5 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -328,6 +328,7 @@ enum class LLVMRustAttributeKind { CapturesNone = 46, SanitizeRealtimeNonblocking = 47, SanitizeRealtimeBlocking = 48, + Convergent = 49, }; static Attribute::AttrKind fromRust(LLVMRustAttributeKind Kind) { @@ -428,6 +429,8 @@ static Attribute::AttrKind fromRust(LLVMRustAttributeKind Kind) { return Attribute::SanitizeRealtime; case LLVMRustAttributeKind::SanitizeRealtimeBlocking: return Attribute::SanitizeRealtimeBlocking; + case LLVMRustAttributeKind::Convergent: + return Attribute::Convergent; } report_fatal_error("bad LLVMRustAttributeKind"); } diff --git a/tests/codegen-llvm/gpu-convergent.rs b/tests/codegen-llvm/gpu-convergent.rs new file mode 100644 index 0000000000000..bb9271ab69996 --- /dev/null +++ b/tests/codegen-llvm/gpu-convergent.rs @@ -0,0 +1,28 @@ +// Checks that when compiling for GPU targets, the convergent attribute +// is added to function declarations and definitions. + +//@ add-minicore +//@ revisions: amdgpu nvptx +//@ [amdgpu] compile-flags: --crate-type=rlib --target=amdgcn-amd-amdhsa -Ctarget-cpu=gfx900 +//@ [amdgpu] needs-llvm-components: amdgpu +//@ [nvptx] compile-flags: --crate-type=rlib --target=nvptx64-nvidia-cuda +//@ [nvptx] needs-llvm-components: nvptx +#![feature(no_core, lang_items, abi_gpu_kernel)] +#![no_core] + +extern crate minicore; +use minicore::*; + +extern "C" { + fn ext(); +} + +// CHECK: define {{.*}}_kernel void @fun(i32{{.*}}) unnamed_addr #[[ATTR:[0-9]+]] { +// CHECK: declare void @ext() unnamed_addr #[[ATTR]] +// CHECK: attributes #[[ATTR]] = {{.*}} convergent +#[no_mangle] +pub extern "gpu-kernel" fn fun(_: i32) { + unsafe { + ext(); + } +}