diff --git a/coresimd/x86_64/cmpxchg16b.rs b/coresimd/x86_64/cmpxchg16b.rs new file mode 100644 index 0000000000..17334c0abf --- /dev/null +++ b/coresimd/x86_64/cmpxchg16b.rs @@ -0,0 +1,74 @@ +use sync::atomic::Ordering; + +#[cfg(test)] +use stdsimd_test::assert_instr; + +/// Compare and exchange 16 bytes (128 bits) of data atomically. +/// +/// This intrinsic corresponds to the `cmpxchg16b` instruction on x86_64 +/// processors. It performs an atomic compare-and-swap, updating the `ptr` +/// memory location to `val` if the current value in memory equals `old`. +/// +/// # Return value +/// +/// This function returns the previous value at the memory location. If it is +/// equal to `old` then the memory was updated to `new`. +/// +/// # Memory Orderings +/// +/// This atomic operations has the same semantics of memory orderings as +/// `AtomicUsize::compare_exchange` does, only operating on 16 bytes of memory +/// instead of just a pointer. +/// +/// For more information on memory orderings here see the `compare_exchange` +/// documentation for other `Atomic*` types in the standard library. +/// +/// # Unsafety +/// +/// This method is unsafe because it takes a raw pointer and will attempt to +/// read and possibly write the memory at the pointer. The pointer must also be +/// aligned on a 16-byte boundary. +/// +/// This method also requires the `cmpxchg16b` CPU feature to be available at +/// runtime to work correctly. If the CPU running the binary does not actually +/// support `cmpxchg16b` and the program enters an execution path that +/// eventually would reach this function the behavior is undefined. +/// +/// The `success` ordering must also be stronger or equal to `failure`, or this +/// function call is undefined. See the `Atomic*` documentation's +/// `compare_exchange` function for more information. When `compare_exchange` +/// panics, this is undefined behavior. Currently this function aborts the +/// process with an undefined instruction. +#[inline] +#[cfg_attr(test, assert_instr(cmpxchg16b, success = Ordering::SeqCst, failure = Ordering::SeqCst))] +#[target_feature(enable = "cmpxchg16b")] +pub unsafe fn cmpxchg16b( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, +) -> u128 { + use intrinsics; + use sync::atomic::Ordering::*; + + debug_assert!(dst as usize % 16 == 0); + + let (val, _ok) = match (success, failure) { + (Acquire, Acquire) => intrinsics::atomic_cxchg_acq(dst, old, new), + (Release, Relaxed) => intrinsics::atomic_cxchg_rel(dst, old, new), + (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel(dst, old, new), + (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed(dst, old, new), + (SeqCst, SeqCst) => intrinsics::atomic_cxchg(dst, old, new), + (Acquire, Relaxed) => intrinsics::atomic_cxchg_acq_failrelaxed(dst, old, new), + (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_failrelaxed(dst, old, new), + (SeqCst, Relaxed) => intrinsics::atomic_cxchg_failrelaxed(dst, old, new), + (SeqCst, Acquire) => intrinsics::atomic_cxchg_failacq(dst, old, new), + + // The above block is all copied from libcore, and this statement is + // also copied from libcore except that it's a panic in libcore and we + // have a little bit more of a lightweight panic here. + _ => ::coresimd::x86::ud2(), + }; + val +} diff --git a/coresimd/x86_64/mod.rs b/coresimd/x86_64/mod.rs index c2399a86f2..46859e92c9 100644 --- a/coresimd/x86_64/mod.rs +++ b/coresimd/x86_64/mod.rs @@ -38,3 +38,6 @@ pub use self::bswap::*; mod rdrand; pub use self::rdrand::*; + +mod cmpxchg16b; +pub use self::cmpxchg16b::*; diff --git a/crates/coresimd/src/lib.rs b/crates/coresimd/src/lib.rs index 4cd07343ce..ded34b7778 100644 --- a/crates/coresimd/src/lib.rs +++ b/crates/coresimd/src/lib.rs @@ -33,6 +33,7 @@ sse4a_target_feature, arm_target_feature, aarch64_target_feature, + cmpxchg16b_target_feature, avx512_target_feature, mips_target_feature, powerpc_target_feature, @@ -67,6 +68,8 @@ test(attr(allow(dead_code, deprecated, unused_variables, unused_mut))) )] +#[macro_use] +#[allow(unused_imports)] extern crate core as _core; #[cfg(test)] #[macro_use] @@ -122,6 +125,8 @@ use _core::result; #[allow(unused_imports)] use _core::slice; #[allow(unused_imports)] +use _core::sync; +#[allow(unused_imports)] use _core::u128; #[allow(unused_imports)] use _core::u8; diff --git a/crates/stdsimd-test/src/disassembly.rs b/crates/stdsimd-test/src/disassembly.rs index 9ae5642370..3882207080 100644 --- a/crates/stdsimd-test/src/disassembly.rs +++ b/crates/stdsimd-test/src/disassembly.rs @@ -101,7 +101,9 @@ fn parse_objdump(output: &str) -> HashMap> { .skip_while(|s| { s.len() == expected_len && usize::from_str_radix(s, 16).is_ok() - }).map(|s| s.to_string()) + }) + .skip_while(|s| *s == "lock") // skip x86-specific prefix + .map(|s| s.to_string()) .collect::>(); instructions.push(Instruction { parts }); } @@ -198,6 +200,7 @@ fn parse_dumpbin(output: &str) -> HashMap> { .skip_while(|s| { s.len() == 2 && usize::from_str_radix(s, 16).is_ok() }).map(|s| s.to_string()) + .skip_while(|s| *s == "lock") // skip x86-specific prefix .collect::>(); instructions.push(Instruction { parts }); } diff --git a/crates/stdsimd-verify/src/lib.rs b/crates/stdsimd-verify/src/lib.rs index efaa910a06..751614bbf8 100644 --- a/crates/stdsimd-verify/src/lib.rs +++ b/crates/stdsimd-verify/src/lib.rs @@ -124,7 +124,9 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream { "u16" => quote! { &U16 }, "u32" => quote! { &U32 }, "u64" => quote! { &U64 }, + "u128" => quote! { &U128 }, "u8" => quote! { &U8 }, + "Ordering" => quote! { &ORDERING }, "CpuidResult" => quote! { &CPUID }, // arm ... diff --git a/crates/stdsimd-verify/tests/x86-intel.rs b/crates/stdsimd-verify/tests/x86-intel.rs index 7164614155..546dc1a33c 100644 --- a/crates/stdsimd-verify/tests/x86-intel.rs +++ b/crates/stdsimd-verify/tests/x86-intel.rs @@ -37,7 +37,9 @@ static I8: Type = Type::PrimSigned(8); static U16: Type = Type::PrimUnsigned(16); static U32: Type = Type::PrimUnsigned(32); static U64: Type = Type::PrimUnsigned(64); +static U128: Type = Type::PrimUnsigned(128); static U8: Type = Type::PrimUnsigned(8); +static ORDERING: Type = Type::Ordering; static M64: Type = Type::M64; static M128: Type = Type::M128; @@ -75,6 +77,7 @@ enum Type { Tuple, CpuidResult, Never, + Ordering, } stdsimd_verify::x86_functions!(static FUNCTIONS); @@ -145,6 +148,8 @@ fn verify_all_signatures() { "__cpuid_count" | "__cpuid" | "__get_cpuid_max" | + // Not listed with intel, but manually verified + "cmpxchg16b" | // The UD2 intrinsic is not defined by Intel, but it was agreed on // in the RFC Issue 2512: // https://github.com/rust-lang/rfcs/issues/2512 diff --git a/stdsimd/arch/detect/arch/x86.rs b/stdsimd/arch/detect/arch/x86.rs index 5faaff316a..7a202e7135 100644 --- a/stdsimd/arch/detect/arch/x86.rs +++ b/stdsimd/arch/detect/arch/x86.rs @@ -226,6 +226,10 @@ macro_rules! is_x86_feature_detected { cfg!(target_feature = "xsavec") || $crate::arch::detect::check_for( $crate::arch::detect::Feature::xsavec) }; + ("cmpxchg16b") => { + cfg!(target_feature = "cmpxchg16b") || $crate::arch::detect::check_for( + $crate::arch::detect::Feature::cmpxchg16b) + }; ($t:tt) => { compile_error!(concat!("unknown target feature: ", $t)) }; @@ -316,4 +320,6 @@ pub enum Feature { xsaves, /// XSAVEC (Save Processor Extended States Compacted) xsavec, + /// CMPXCH16B, a 16-byte compare-and-swap instruction + cmpxchg16b, } diff --git a/stdsimd/arch/detect/os/x86.rs b/stdsimd/arch/detect/os/x86.rs index bb49a2a2fc..c0fbf8b73b 100644 --- a/stdsimd/arch/detect/os/x86.rs +++ b/stdsimd/arch/detect/os/x86.rs @@ -116,6 +116,7 @@ fn detect_features() -> cache::Initializer { enable(proc_info_ecx, 0, Feature::sse3); enable(proc_info_ecx, 9, Feature::ssse3); + enable(proc_info_ecx, 13, Feature::cmpxchg16b); enable(proc_info_ecx, 19, Feature::sse4_1); enable(proc_info_ecx, 20, Feature::sse4_2); enable(proc_info_ecx, 23, Feature::popcnt); @@ -288,6 +289,7 @@ mod tests { println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt")); println!("xsaves: {:?}", is_x86_feature_detected!("xsaves")); println!("xsavec: {:?}", is_x86_feature_detected!("xsavec")); + println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b")); } #[test] @@ -344,5 +346,9 @@ mod tests { is_x86_feature_detected!("xsaves"), information.xsaves_xrstors_and_ia32_xss() ); + assert_eq!( + is_x86_feature_detected!("cmpxchg16b"), + information.cmpxchg16b(), + ); } }