From 25234f8652f59a796617c44688e8d4696611c314 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 26 Jan 2026 18:24:52 +0100 Subject: [PATCH 1/2] Add two new target-specific intrinsics mapping --- src/intrinsic/llvm.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index 10183c5ee52..fbf3050a8a1 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -1061,13 +1061,18 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function "llvm.x86.xgetbv" => "__builtin_ia32_xgetbv", // NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html + // FIXME: Should handle other targets than `ia32`. "llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd", + // FIXME: Should handle other targets than `ia32`. + "llvm.sqrt.v4f32" => "__builtin_ia32_sqrtps", "llvm.sqrt.f32" => { let gcc_name = "__builtin_sqrtf"; let func = cx.context.get_builtin_function(gcc_name); cx.functions.borrow_mut().insert(gcc_name.to_string(), func); return func; } + // FIXME: Should handle other targets than `ia32`. + "llvm.smax.v4i32" => "__builtin_ia32_pmaxsd128", "llvm.x86.avx512.pmul.dq.512" => "__builtin_ia32_pmuldq512_mask", "llvm.x86.avx512.pmulu.dq.512" => "__builtin_ia32_pmuludq512_mask", "llvm.x86.avx512.max.ps.512" => "__builtin_ia32_maxps512_mask", From 436ae1e0a0d48967677bb6bf567d3d7c4697f498 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Mon, 26 Jan 2026 18:25:25 +0100 Subject: [PATCH 2/2] Add regression test for `llvm.sqrt.v4f64` and `llvm.smax.v4i32` mapping --- tests/run/simd-ffi.rs | 102 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tests/run/simd-ffi.rs diff --git a/tests/run/simd-ffi.rs b/tests/run/simd-ffi.rs new file mode 100644 index 00000000000..67cc2e5b96e --- /dev/null +++ b/tests/run/simd-ffi.rs @@ -0,0 +1,102 @@ +// Compiler: +// +// Run-time: +// status: 0 + +// FIXME: Remove this test once stops +// ignoring GCC backend. + +#![allow(internal_features, non_camel_case_types)] +// we can compile to a variety of platforms, because we don't need +// cross-compiled standard libraries. +#![feature(no_core, auto_traits)] +#![no_core] +#![feature(repr_simd, simd_ffi, link_llvm_intrinsics, lang_items, rustc_attrs)] + +#[derive(Copy)] +#[repr(simd)] +pub struct f32x4([f32; 4]); + +extern "C" { + #[link_name = "llvm.sqrt.v4f32"] + fn vsqrt(x: f32x4) -> f32x4; +} + +pub fn foo(x: f32x4) -> f32x4 { + unsafe { vsqrt(x) } +} + +#[derive(Copy)] +#[repr(simd)] +pub struct i32x4([i32; 4]); + +extern "C" { + // _mm_sll_epi32 + #[cfg(all(any(target_arch = "x86", target_arch = "x86-64"), target_feature = "sse2"))] + #[link_name = "llvm.x86.sse2.psll.d"] + fn integer(a: i32x4, b: i32x4) -> i32x4; + + // vmaxq_s32 + #[cfg(target_arch = "arm")] + #[link_name = "llvm.arm.neon.vmaxs.v4i32"] + fn integer(a: i32x4, b: i32x4) -> i32x4; + // vmaxq_s32 + #[cfg(target_arch = "aarch64")] + #[link_name = "llvm.aarch64.neon.maxs.v4i32"] + fn integer(a: i32x4, b: i32x4) -> i32x4; + + // Use a generic LLVM intrinsic to do type checking on other platforms + #[cfg(not(any( + all(any(target_arch = "x86", target_arch = "x86-64"), target_feature = "sse2"), + target_arch = "arm", + target_arch = "aarch64" + )))] + #[link_name = "llvm.smax.v4i32"] + fn integer(a: i32x4, b: i32x4) -> i32x4; +} + +pub fn bar(a: i32x4, b: i32x4) -> i32x4 { + unsafe { integer(a, b) } +} + +#[lang = "pointee_sized"] +pub trait PointeeSized {} + +#[lang = "meta_sized"] +pub trait MetaSized: PointeeSized {} + +#[lang = "sized"] +pub trait Sized: MetaSized {} + +#[lang = "copy"] +pub trait Copy {} + +impl Copy for f32 {} +impl Copy for i32 {} +impl Copy for [f32; 4] {} +impl Copy for [i32; 4] {} + +pub mod marker { + pub use Copy; +} + +#[lang = "freeze"] +auto trait Freeze {} + +#[macro_export] +#[rustc_builtin_macro] +macro_rules! Copy { + () => {}; +} +#[macro_export] +#[rustc_builtin_macro] +macro_rules! derive { + () => {}; +} + +#[lang = "start"] +fn start(_main: fn() -> T, _argc: isize, _argv: *const *const u8, _sigpipe: u8) -> isize { + 0 +} + +fn main() {}