From e67c117e0a2f7a957787697794f84e45a41cee18 Mon Sep 17 00:00:00 2001 From: Iain Buclaw Date: Sun, 13 Oct 2019 22:21:35 +0200 Subject: [PATCH] core/atomic.d: Add GDC atomic support code Following on from #2596, rewrite to work with the new atomic API --- src/core/atomic.d | 7 + src/core/internal/atomic.d | 1330 +++++++++++++++++++++++------------- 2 files changed, 857 insertions(+), 480 deletions(-) diff --git a/src/core/atomic.d b/src/core/atomic.d index fffe690cb4..b00d5545b4 100644 --- a/src/core/atomic.d +++ b/src/core/atomic.d @@ -605,6 +605,13 @@ else version (D_InlineAsm_X86_64) enum has64BitCAS = true; enum has128BitCAS = true; } +else version (GNU) +{ + import gcc.config; + enum has64BitCAS = GNU_Have_64Bit_Atomics; + enum has64BitXCHG = GNU_Have_64Bit_Atomics; + enum has128BitCAS = GNU_Have_LibAtomic; +} else { enum has64BitXCHG = false; diff --git a/src/core/internal/atomic.d b/src/core/internal/atomic.d index 5f11f761b9..76bcb4782d 100644 --- a/src/core/internal/atomic.d +++ b/src/core/internal/atomic.d @@ -12,664 +12,1034 @@ module core.internal.atomic; import core.atomic : MemoryOrder; -private +version (DigitalMars) { - enum : int + private { - AX, BX, CX, DX, DI, SI, R8, R9 - } + enum : int + { + AX, BX, CX, DX, DI, SI, R8, R9 + } - immutable string[4][8] registerNames = [ - [ "AL", "AX", "EAX", "RAX" ], - [ "BL", "BX", "EBX", "RBX" ], - [ "CL", "CX", "ECX", "RCX" ], - [ "DL", "DX", "EDX", "RDX" ], - [ "DIL", "DI", "EDI", "RDI" ], - [ "SIL", "SI", "ESI", "RSI" ], - [ "R8B", "R8W", "R8D", "R8" ], - [ "R9B", "R9W", "R9D", "R9" ], - ]; - - template RegIndex(T) - { - static if (T.sizeof == 1) - enum RegIndex = 0; - else static if (T.sizeof == 2) - enum RegIndex = 1; - else static if (T.sizeof == 4) - enum RegIndex = 2; - else static if (T.sizeof == 8) - enum RegIndex = 3; - else - static assert(false, "Invalid type"); - } + immutable string[4][8] registerNames = [ + [ "AL", "AX", "EAX", "RAX" ], + [ "BL", "BX", "EBX", "RBX" ], + [ "CL", "CX", "ECX", "RCX" ], + [ "DL", "DX", "EDX", "RDX" ], + [ "DIL", "DI", "EDI", "RDI" ], + [ "SIL", "SI", "ESI", "RSI" ], + [ "R8B", "R8W", "R8D", "R8" ], + [ "R9B", "R9W", "R9D", "R9" ], + ]; - enum SizedReg(int reg, T = size_t) = registerNames[reg][RegIndex!T]; -} + template RegIndex(T) + { + static if (T.sizeof == 1) + enum RegIndex = 0; + else static if (T.sizeof == 2) + enum RegIndex = 1; + else static if (T.sizeof == 4) + enum RegIndex = 2; + else static if (T.sizeof == 8) + enum RegIndex = 3; + else + static assert(false, "Invalid type"); + } -inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted - if (CanCAS!T) -{ - static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()"); + enum SizedReg(int reg, T = size_t) = registerNames[reg][RegIndex!T]; + } - static if (T.sizeof == size_t.sizeof * 2) + inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted + if (CanCAS!T) { - version (D_InlineAsm_X86) + static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()"); + + static if (T.sizeof == size_t.sizeof * 2) { - asm pure nothrow @nogc @trusted + version (D_InlineAsm_X86) { - push EDI; - push EBX; - mov EBX, 0; - mov ECX, 0; - mov EAX, 0; - mov EDX, 0; - mov EDI, src; - lock; cmpxchg8b [EDI]; - pop EBX; - pop EDI; + asm pure nothrow @nogc @trusted + { + push EDI; + push EBX; + mov EBX, 0; + mov ECX, 0; + mov EAX, 0; + mov EDX, 0; + mov EDI, src; + lock; cmpxchg8b [EDI]; + pop EBX; + pop EDI; + } } - } - else version (D_InlineAsm_X86_64) - { - version (Windows) + else version (D_InlineAsm_X86_64) { - static if (RegisterReturn!T) + version (Windows) { - enum SrcPtr = SizedReg!CX; - enum RetPtr = null; + static if (RegisterReturn!T) + { + enum SrcPtr = SizedReg!CX; + enum RetPtr = null; + } + else + { + enum SrcPtr = SizedReg!DX; + enum RetPtr = SizedReg!CX; + } + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov R8, %0; + ?1 mov R9, %1; + mov RBX, 0; + mov RCX, 0; + mov RAX, 0; + mov RDX, 0; + lock; cmpxchg16b [R8]; + ?1 mov [R9], RAX; + ?1 mov 8[R9], RDX; + pop RBX; + ret; + } + }, SrcPtr, RetPtr)); } else { - enum SrcPtr = SizedReg!DX; - enum RetPtr = SizedReg!CX; - } - - mixin (simpleFormat(q{ asm pure nothrow @nogc @trusted { naked; push RBX; - mov R8, %0; -?1 mov R9, %1; mov RBX, 0; mov RCX, 0; mov RAX, 0; mov RDX, 0; - lock; cmpxchg16b [R8]; -?1 mov [R9], RAX; -?1 mov 8[R9], RDX; + lock; cmpxchg16b [RDI]; pop RBX; ret; } - }, SrcPtr, RetPtr)); + } } - else + } + else static if (needsLoadBarrier!order) + { + version (D_InlineAsm_X86) + { + enum SrcReg = SizedReg!CX; + enum ZeroReg = SizedReg!(DX, T); + enum ResReg = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + mov %1, 0; + mov %2, 0; + mov %0, src; + lock; cmpxchg [%0], %1; + } + }, SrcReg, ZeroReg, ResReg)); + } + else version (D_InlineAsm_X86_64) + { + version (Windows) + enum SrcReg = SizedReg!CX; + else + enum SrcReg = SizedReg!DI; + enum ZeroReg = SizedReg!(DX, T); + enum ResReg = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + naked; + mov %1, 0; + mov %2, 0; + lock; cmpxchg [%0], %1; + ret; + } + }, SrcReg, ZeroReg, ResReg)); + } + } + else + return *src; + } + + void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted + if (CanCAS!T) + { + static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()"); + + static if (T.sizeof == size_t.sizeof * 2) + { + version (D_InlineAsm_X86) { asm pure nothrow @nogc @trusted { - naked; - push RBX; - mov RBX, 0; - mov RCX, 0; - mov RAX, 0; - mov RDX, 0; - lock; cmpxchg16b [RDI]; - pop RBX; - ret; + push EDI; + push EBX; + lea EDI, value; + mov EBX, [EDI]; + mov ECX, 4[EDI]; + mov EDI, dest; + mov EAX, [EDI]; + mov EDX, 4[EDI]; + L1: lock; cmpxchg8b [EDI]; + jne L1; + pop EBX; + pop EDI; + } + } + else version (D_InlineAsm_X86_64) + { + version (Windows) + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov R8, RDX; + mov RAX, [RDX]; + mov RDX, 8[RDX]; + mov RBX, [RCX]; + mov RCX, 8[RCX]; + L1: lock; cmpxchg16b [R8]; + jne L1; + pop RBX; + ret; + } + } + else + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov RBX, RDI; + mov RCX, RSI; + mov RDI, RDX; + mov RAX, [RDX]; + mov RDX, 8[RDX]; + L1: lock; cmpxchg16b [RDI]; + jne L1; + pop RBX; + ret; + } } } } + else static if (needsStoreBarrier!order) + atomicExchange!(order, false)(dest, value); + else + *dest = value; } - else static if (needsLoadBarrier!order) + + T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted + if (is(T : ulong)) { version (D_InlineAsm_X86) { - enum SrcReg = SizedReg!CX; - enum ZeroReg = SizedReg!(DX, T); - enum ResReg = SizedReg!(AX, T); + static assert(T.sizeof <= 4, "64bit atomicFetchAdd not supported on 32bit target." ); + + enum DestReg = SizedReg!DX; + enum ValReg = SizedReg!(AX, T); mixin (simpleFormat(q{ asm pure nothrow @nogc @trusted { - mov %1, 0; - mov %2, 0; - mov %0, src; - lock; cmpxchg [%0], %1; + mov %1, value; + mov %0, dest; + lock; xadd[%0], %1; } - }, SrcReg, ZeroReg, ResReg)); + }, DestReg, ValReg)); } else version (D_InlineAsm_X86_64) { version (Windows) - enum SrcReg = SizedReg!CX; + { + enum DestReg = SizedReg!DX; + enum ValReg = SizedReg!(CX, T); + } else - enum SrcReg = SizedReg!DI; - enum ZeroReg = SizedReg!(DX, T); - enum ResReg = SizedReg!(AX, T); + { + enum DestReg = SizedReg!SI; + enum ValReg = SizedReg!(DI, T); + } + enum ResReg = result ? SizedReg!(AX, T) : null; mixin (simpleFormat(q{ asm pure nothrow @nogc @trusted { naked; - mov %1, 0; - mov %2, 0; - lock; cmpxchg [%0], %1; + lock; xadd[%0], %1; + ?2 mov %2, %1; ret; } - }, SrcReg, ZeroReg, ResReg)); + }, DestReg, ValReg, ResReg)); } + else + static assert (false, "Unsupported architecture."); } - else - return *src; -} -void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted - if (CanCAS!T) -{ - static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()"); + T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted + if (is(T : ulong)) + { + return atomicFetchAdd(dest, cast(T)-cast(IntOrLong!T)value); + } - static if (T.sizeof == size_t.sizeof * 2) + T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted + if (is(T : ulong) || is(T == class) || is(T == interface) || is(T U : U*)) { version (D_InlineAsm_X86) { - asm pure nothrow @nogc @trusted - { - push EDI; - push EBX; - lea EDI, value; - mov EBX, [EDI]; - mov ECX, 4[EDI]; - mov EDI, dest; - mov EAX, [EDI]; - mov EDX, 4[EDI]; - L1: lock; cmpxchg8b [EDI]; - jne L1; - pop EBX; - pop EDI; - } + static assert(T.sizeof <= 4, "64bit atomicExchange not supported on 32bit target." ); + + enum DestReg = SizedReg!CX; + enum ValReg = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + mov %1, value; + mov %0, dest; + xchg [%0], %1; + } + }, DestReg, ValReg)); } else version (D_InlineAsm_X86_64) { version (Windows) { - asm pure nothrow @nogc @trusted - { - naked; - push RBX; - mov R8, RDX; - mov RAX, [RDX]; - mov RDX, 8[RDX]; - mov RBX, [RCX]; - mov RCX, 8[RCX]; - L1: lock; cmpxchg16b [R8]; - jne L1; - pop RBX; - ret; - } + enum DestReg = SizedReg!DX; + enum ValReg = SizedReg!(CX, T); } else { + enum DestReg = SizedReg!SI; + enum ValReg = SizedReg!(DI, T); + } + enum ResReg = result ? SizedReg!(AX, T) : null; + + mixin (simpleFormat(q{ asm pure nothrow @nogc @trusted { naked; - push RBX; - mov RBX, RDI; - mov RCX, RSI; - mov RDI, RDX; - mov RAX, [RDX]; - mov RDX, 8[RDX]; - L1: lock; cmpxchg16b [RDI]; - jne L1; - pop RBX; + xchg [%0], %1; + ?2 mov %2, %1; ret; } - } + }, DestReg, ValReg, ResReg)); } + else + static assert (false, "Unsupported architecture."); } - else static if (needsStoreBarrier!order) - atomicExchange!(order, false)(dest, value); - else - *dest = value; -} -T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted - if (is(T : ulong)) -{ - version (D_InlineAsm_X86) - { - static assert(T.sizeof <= 4, "64bit atomicFetchAdd not supported on 32bit target." ); + alias atomicCompareExchangeWeak = atomicCompareExchangeStrong; - enum DestReg = SizedReg!DX; - enum ValReg = SizedReg!(AX, T); - - mixin (simpleFormat(q{ - asm pure nothrow @nogc @trusted - { - mov %1, value; - mov %0, dest; - lock; xadd[%0], %1; - } - }, DestReg, ValReg)); - } - else version (D_InlineAsm_X86_64) + bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted + if (CanCAS!T) { - version (Windows) - { - enum DestReg = SizedReg!DX; - enum ValReg = SizedReg!(CX, T); - } - else + version (D_InlineAsm_X86) { - enum DestReg = SizedReg!SI; - enum ValReg = SizedReg!(DI, T); - } - enum ResReg = result ? SizedReg!(AX, T) : null; - - mixin (simpleFormat(q{ - asm pure nothrow @nogc @trusted + static if (T.sizeof <= 4) { - naked; - lock; xadd[%0], %1; -?2 mov %2, %1; - ret; - } - }, DestReg, ValReg, ResReg)); - } - else - static assert (false, "Unsupported architecture."); -} - -T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted - if (is(T : ulong)) -{ - return atomicFetchAdd(dest, cast(T)-cast(IntOrLong!T)value); -} - -T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted - if (is(T : ulong) || is(T == class) || is(T == interface) || is(T U : U*)) -{ - version (D_InlineAsm_X86) - { - static assert(T.sizeof <= 4, "64bit atomicExchange not supported on 32bit target." ); - - enum DestReg = SizedReg!CX; - enum ValReg = SizedReg!(AX, T); + enum DestAddr = SizedReg!CX; + enum CmpAddr = SizedReg!DI; + enum Val = SizedReg!(DX, T); + enum Cmp = SizedReg!(AX, T); - mixin (simpleFormat(q{ - asm pure nothrow @nogc @trusted + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + push %1; + mov %2, value; + mov %1, compare; + mov %3, [%1]; + mov %0, dest; + lock; cmpxchg [%0], %2; + mov [%1], %3; + setz AL; + pop %1; + } + }, DestAddr, CmpAddr, Val, Cmp)); + } + else static if (T.sizeof == 8) { - mov %1, value; - mov %0, dest; - xchg [%0], %1; + asm pure nothrow @nogc @trusted + { + push EDI; + push EBX; + lea EDI, value; + mov EBX, [EDI]; + mov ECX, 4[EDI]; + mov EDI, compare; + mov EAX, [EDI]; + mov EDX, 4[EDI]; + mov EDI, dest; + lock; cmpxchg8b [EDI]; + mov EDI, compare; + mov [EDI], EAX; + mov 4[EDI], EDX; + setz AL; + pop EBX; + pop EDI; + } } - }, DestReg, ValReg)); - } - else version (D_InlineAsm_X86_64) - { - version (Windows) - { - enum DestReg = SizedReg!DX; - enum ValReg = SizedReg!(CX, T); + else + static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." ); } - else + else version (D_InlineAsm_X86_64) { - enum DestReg = SizedReg!SI; - enum ValReg = SizedReg!(DI, T); - } - enum ResReg = result ? SizedReg!(AX, T) : null; + static if (T.sizeof <= 8) + { + version (Windows) + { + enum DestAddr = SizedReg!R8; + enum CmpAddr = SizedReg!DX; + enum Val = SizedReg!(CX, T); + } + else + { + enum DestAddr = SizedReg!DX; + enum CmpAddr = SizedReg!SI; + enum Val = SizedReg!(DI, T); + } + enum Res = SizedReg!(AX, T); - mixin (simpleFormat(q{ - asm pure nothrow @nogc @trusted + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + naked; + mov %3, [%1]; + lock; cmpxchg [%0], %2; + jne compare_fail; + mov AL, 1; + ret; + compare_fail: + mov [%1], %3; + xor AL, AL; + ret; + } + }, DestAddr, CmpAddr, Val, Res)); + } + else { - naked; - xchg [%0], %1; -?2 mov %2, %1; - ret; + version (Windows) + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov R9, RDX; + mov RAX, [RDX]; + mov RDX, 8[RDX]; + mov RBX, [RCX]; + mov RCX, 8[RCX]; + lock; cmpxchg16b [R8]; + pop RBX; + jne compare_fail; + mov AL, 1; + ret; + compare_fail: + mov [R9], RAX; + mov 8[R9], RDX; + xor AL, AL; + ret; + } + } + else + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov R8, RCX; + mov R9, RDX; + mov RAX, [RDX]; + mov RDX, 8[RDX]; + mov RBX, RDI; + mov RCX, RSI; + lock; cmpxchg16b [R8]; + pop RBX; + jne compare_fail; + mov AL, 1; + ret; + compare_fail: + mov [R9], RAX; + mov 8[R9], RDX; + xor AL, AL; + ret; + } + } } - }, DestReg, ValReg, ResReg)); + } + else + static assert (false, "Unsupported architecture."); } - else - static assert (false, "Unsupported architecture."); -} -alias atomicCompareExchangeWeak = atomicCompareExchangeStrong; - -bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted - if (CanCAS!T) -{ - version (D_InlineAsm_X86) + bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted + if (CanCAS!T) { - static if (T.sizeof <= 4) + version (D_InlineAsm_X86) { - enum DestAddr = SizedReg!CX; - enum CmpAddr = SizedReg!DI; - enum Val = SizedReg!(DX, T); - enum Cmp = SizedReg!(AX, T); + static if (T.sizeof <= 4) + { + enum DestAddr = SizedReg!CX; + enum Cmp = SizedReg!(AX, T); + enum Val = SizedReg!(DX, T); - mixin (simpleFormat(q{ + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + mov %2, value; + mov %1, compare; + mov %0, dest; + lock; cmpxchg [%0], %2; + setz AL; + } + }, DestAddr, Cmp, Val)); + } + else static if (T.sizeof == 8) + { asm pure nothrow @nogc @trusted { - push %1; - mov %2, value; - mov %1, compare; - mov %3, [%1]; - mov %0, dest; - lock; cmpxchg [%0], %2; - mov [%1], %3; + push EDI; + push EBX; + lea EDI, value; + mov EBX, [EDI]; + mov ECX, 4[EDI]; + lea EDI, compare; + mov EAX, [EDI]; + mov EDX, 4[EDI]; + mov EDI, dest; + lock; cmpxchg8b [EDI]; setz AL; - pop %1; + pop EBX; + pop EDI; } - }, DestAddr, CmpAddr, Val, Cmp)); - } - else static if (T.sizeof == 8) - { - asm pure nothrow @nogc @trusted - { - push EDI; - push EBX; - lea EDI, value; - mov EBX, [EDI]; - mov ECX, 4[EDI]; - mov EDI, compare; - mov EAX, [EDI]; - mov EDX, 4[EDI]; - mov EDI, dest; - lock; cmpxchg8b [EDI]; - mov EDI, compare; - mov [EDI], EAX; - mov 4[EDI], EDX; - setz AL; - pop EBX; - pop EDI; } + else + static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." ); } - else - static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." ); - } - else version (D_InlineAsm_X86_64) - { - static if (T.sizeof <= 8) + else version (D_InlineAsm_X86_64) { - version (Windows) + static if (T.sizeof <= 8) { - enum DestAddr = SizedReg!R8; - enum CmpAddr = SizedReg!DX; - enum Val = SizedReg!(CX, T); + version (Windows) + { + enum DestAddr = SizedReg!R8; + enum Cmp = SizedReg!(DX, T); + enum Val = SizedReg!(CX, T); + } + else + { + enum DestAddr = SizedReg!DX; + enum Cmp = SizedReg!(SI, T); + enum Val = SizedReg!(DI, T); + } + enum AXReg = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + naked; + mov %3, %1; + lock; cmpxchg [%0], %2; + setz AL; + ret; + } + }, DestAddr, Cmp, Val, AXReg)); } else { - enum DestAddr = SizedReg!DX; - enum CmpAddr = SizedReg!SI; - enum Val = SizedReg!(DI, T); - } - enum Res = SizedReg!(AX, T); - - mixin (simpleFormat(q{ - asm pure nothrow @nogc @trusted + version (Windows) { - naked; - mov %3, [%1]; - lock; cmpxchg [%0], %2; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [%1], %3; - xor AL, AL; - ret; + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov RAX, [RDX]; + mov RDX, 8[RDX]; + mov RBX, [RCX]; + mov RCX, 8[RCX]; + lock; cmpxchg16b [R8]; + setz AL; + pop RBX; + ret; + } } - }, DestAddr, CmpAddr, Val, Res)); + else + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov RAX, RDX; + mov RDX, RCX; + mov RBX, RDI; + mov RCX, RSI; + lock; cmpxchg16b [R8]; + setz AL; + pop RBX; + ret; + } + } + } } else + static assert (false, "Unsupported architecture."); + } + + void atomicFence(MemoryOrder order = MemoryOrder.seq)() pure nothrow @nogc @trusted + { + // TODO: `mfence` should only be required for seq_cst operations, but this depends on + // the compiler's backend knowledge to not reorder code inappropriately, + // so we'll apply it conservatively. + static if (order != MemoryOrder.raw) { - version (Windows) + version (D_InlineAsm_X86) { + import core.cpuid; + + // TODO: review this implementation; it seems way overly complicated asm pure nothrow @nogc @trusted { naked; - push RBX; - mov R9, RDX; - mov RAX, [RDX]; - mov RDX, 8[RDX]; - mov RBX, [RCX]; - mov RCX, 8[RCX]; - lock; cmpxchg16b [R8]; - pop RBX; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [R9], RAX; - mov 8[R9], RDX; - xor AL, AL; + + call sse2; + test AL, AL; + jne Lcpuid; + + // Fast path: We have SSE2, so just use mfence. + mfence; + jmp Lend; + + Lcpuid: + + // Slow path: We use cpuid to serialize. This is + // significantly slower than mfence, but is the + // only serialization facility we have available + // on older non-SSE2 chips. + push EBX; + + mov EAX, 0; + cpuid; + + pop EBX; + + Lend: + ret; } } - else + else version (D_InlineAsm_X86_64) { asm pure nothrow @nogc @trusted { naked; - push RBX; - mov R8, RCX; - mov R9, RDX; - mov RAX, [RDX]; - mov RDX, 8[RDX]; - mov RBX, RDI; - mov RCX, RSI; - lock; cmpxchg16b [R8]; - pop RBX; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [R9], RAX; - mov 8[R9], RDX; - xor AL, AL; + mfence; ret; } } + else + static assert (false, "Unsupported architecture."); } } - else - static assert (false, "Unsupported architecture."); -} -bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted - if (CanCAS!T) -{ - version (D_InlineAsm_X86) + void pause() pure nothrow @nogc @trusted { - static if (T.sizeof <= 4) + version (D_InlineAsm_X86) { - enum DestAddr = SizedReg!CX; - enum Cmp = SizedReg!(AX, T); - enum Val = SizedReg!(DX, T); - - mixin (simpleFormat(q{ - asm pure nothrow @nogc @trusted - { - mov %2, value; - mov %1, compare; - mov %0, dest; - lock; cmpxchg [%0], %2; - setz AL; - } - }, DestAddr, Cmp, Val)); + asm pure nothrow @nogc @trusted + { + naked; + rep; nop; + ret; + } } - else static if (T.sizeof == 8) + else version (D_InlineAsm_X86_64) { asm pure nothrow @nogc @trusted { - push EDI; - push EBX; - lea EDI, value; - mov EBX, [EDI]; - mov ECX, 4[EDI]; - lea EDI, compare; - mov EAX, [EDI]; - mov EDX, 4[EDI]; - mov EDI, dest; - lock; cmpxchg8b [EDI]; - setz AL; - pop EBX; - pop EDI; + naked; + // pause; // TODO: DMD should add this opcode to its inline asm + rep; nop; + ret; } } else - static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." ); + { + // ARM should `yield` + // other architectures? otherwise some sort of nop... + } } - else version (D_InlineAsm_X86_64) +} +else version (GNU) +{ + import gcc.builtins; + import gcc.config; + + inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted + if (CanCAS!T) { - static if (T.sizeof <= 8) + static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()"); + + static if (GNU_Have_Atomics || GNU_Have_LibAtomic) { - version (Windows) + static if (T.sizeof == ubyte.sizeof) { - enum DestAddr = SizedReg!R8; - enum Cmp = SizedReg!(DX, T); - enum Val = SizedReg!(CX, T); + ubyte value = __atomic_load_1(cast(shared)src, order); + return *cast(typeof(return)*)&value; } - else + else static if (T.sizeof == ushort.sizeof) { - enum DestAddr = SizedReg!DX; - enum Cmp = SizedReg!(SI, T); - enum Val = SizedReg!(DI, T); + ushort value = __atomic_load_2(cast(shared)src, order); + return *cast(typeof(return)*)&value; } - enum AXReg = SizedReg!(AX, T); + else static if (T.sizeof == uint.sizeof) + { + uint value = __atomic_load_4(cast(shared)src, order); + return *cast(typeof(return)*)&value; + } + else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) + { + ulong value = __atomic_load_8(cast(shared)src, order); + return *cast(typeof(return)*)&value; + } + else static if (GNU_Have_LibAtomic) + { + T value; + __atomic_load(T.sizeof, cast(shared)src, &value, order); + return *cast(typeof(return)*)&value; + } + else + static assert(0, "Invalid template type specified."); + } + else + { + getAtomicMutex.lock(); + scope(exit) getAtomicMutex.unlock(); + return *cast(typeof(return)*)&src; + } + } - mixin (simpleFormat(q{ - asm pure nothrow @nogc @trusted - { - naked; - mov %3, %1; - lock; cmpxchg [%0], %2; - setz AL; - ret; - } - }, DestAddr, Cmp, Val, AXReg)); + void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted + if (CanCAS!T) + { + static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()"); + + static if (GNU_Have_Atomics || GNU_Have_LibAtomic) + { + static if (T.sizeof == ubyte.sizeof) + __atomic_store_1(cast(shared)dest, *cast(ubyte*)&value, order); + else static if (T.sizeof == ushort.sizeof) + __atomic_store_2(cast(shared)dest, *cast(ushort*)&value, order); + else static if (T.sizeof == uint.sizeof) + __atomic_store_4(cast(shared)dest, *cast(uint*)&value, order); + else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) + __atomic_store_8(cast(shared)dest, *cast(ulong*)&value, order); + else static if (GNU_Have_LibAtomic) + __atomic_store(T.sizeof, cast(shared)dest, cast(void*)&value, order); + else + static assert(0, "Invalid template type specified."); } else { - version (Windows) - { - asm pure nothrow @nogc @trusted - { - naked; - push RBX; - mov RAX, [RDX]; - mov RDX, 8[RDX]; - mov RBX, [RCX]; - mov RCX, 8[RCX]; - lock; cmpxchg16b [R8]; - setz AL; - pop RBX; - ret; - } - } + getAtomicMutex.lock(); + *dest = value; + getAtomicMutex.unlock(); + } + } + + T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted + if (is(T : ulong)) + { + static if (GNU_Have_Atomics || GNU_Have_LibAtomic) + { + static if (T.sizeof == ubyte.sizeof) + return __atomic_fetch_add_1(cast(shared)dest, value, order); + else static if (T.sizeof == ushort.sizeof) + return __atomic_fetch_add_2(cast(shared)dest, value, order); + else static if (T.sizeof == uint.sizeof) + return __atomic_fetch_add_4(cast(shared)dest, value, order); + else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) + return __atomic_fetch_add_8(cast(shared)dest, value, order); + else static if (GNU_Have_LibAtomic) + return __atomic_fetch_add(T.sizeof, cast(shared)dest, cast(void*)&value, order); else - { - asm pure nothrow @nogc @trusted - { - naked; - push RBX; - mov RAX, RDX; - mov RDX, RCX; - mov RBX, RDI; - mov RCX, RSI; - lock; cmpxchg16b [R8]; - setz AL; - pop RBX; - ret; - } - } + static assert(0, "Invalid template type specified."); + } + else + { + getAtomicMutex.lock(); + scope(exit) getAtomicMutex.unlock(); + T tmp = *dest; + *dest += value; + return tmp; } } - else - static assert (false, "Unsupported architecture."); -} -void atomicFence(MemoryOrder order = MemoryOrder.seq)() pure nothrow @nogc @trusted -{ - // TODO: `mfence` should only be required for seq_cst operations, but this depends on - // the compiler's backend knowledge to not reorder code inappropriately, - // so we'll apply it conservatively. - static if (order != MemoryOrder.raw) + T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted + if (is(T : ulong)) { - version (D_InlineAsm_X86) + static if (GNU_Have_Atomics || GNU_Have_LibAtomic) { - import core.cpuid; + static if (T.sizeof == ubyte.sizeof) + return __atomic_fetch_sub_1(cast(shared)dest, value, order); + else static if (T.sizeof == ushort.sizeof) + return __atomic_fetch_sub_2(cast(shared)dest, value, order); + else static if (T.sizeof == uint.sizeof) + return __atomic_fetch_sub_4(cast(shared)dest, value, order); + else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) + return __atomic_fetch_sub_8(cast(shared)dest, value, order); + else static if (GNU_Have_LibAtomic) + return __atomic_fetch_sub(T.sizeof, cast(shared)dest, cast(void*)&value, order); + else + static assert(0, "Invalid template type specified."); + } + else + { + getAtomicMutex.lock(); + scope(exit) getAtomicMutex.unlock(); + T tmp = *dest; + *dest -= value; + return tmp; + } + } - // TODO: review this implementation; it seems way overly complicated - asm pure nothrow @nogc @trusted + T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted + if (is(T : ulong) || is(T == class) || is(T == interface) || is(T U : U*)) + { + static if (GNU_Have_Atomics || GNU_Have_LibAtomic) + { + static if (T.sizeof == byte.sizeof) { - naked; + ubyte res = __atomic_exchange_1(cast(shared)dest, *cast(ubyte*)&value, order); + return *cast(typeof(return)*)&res; + } + else static if (T.sizeof == short.sizeof) + { + ushort res = __atomic_exchange_2(cast(shared)dest, *cast(ushort*)&value, order); + return *cast(typeof(return)*)&res; + } + else static if (T.sizeof == int.sizeof) + { + uint res = __atomic_exchange_4(cast(shared)dest, *cast(uint*)&value, order); + return *cast(typeof(return)*)&res; + } + else static if (T.sizeof == long.sizeof && GNU_Have_64Bit_Atomics) + { + ulong res = __atomic_exchange_8(cast(shared)dest, *cast(ulong*)&value, order); + return *cast(typeof(return)*)&res; + } + else static if (GNU_Have_LibAtomic) + { + T res = void; + __atomic_exchange(T.sizeof, cast(shared)dest, cast(void*)&value, &res, order); + return res; + } + else + static assert(0, "Invalid template type specified."); + } + else + { + getAtomicMutex.lock(); + scope(exit) getAtomicMutex.unlock(); - call sse2; - test AL, AL; - jne Lcpuid; + T res = *dest; + *dest = value; + return res; + } + } - // Fast path: We have SSE2, so just use mfence. - mfence; - jmp Lend; + bool atomicCompareExchangeWeak(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted + if (CanCAS!T) + { + return atomicCompareExchangeImpl!(succ, fail, true)(dest, compare, value); + } - Lcpuid: + bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted + if (CanCAS!T) + { + return atomicCompareExchangeImpl!(succ, fail, false)(dest, compare, value); + } - // Slow path: We use cpuid to serialize. This is - // significantly slower than mfence, but is the - // only serialization facility we have available - // on older non-SSE2 chips. - push EBX; + bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted + if (CanCAS!T) + { + return atomicCompareExchangeImpl!(succ, fail, false)(dest, cast(T*)&compare, value); + } - mov EAX, 0; - cpuid; + private bool atomicCompareExchangeImpl(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, bool weak, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted + if (CanCAS!T) + { + bool res = void; - pop EBX; + static if (GNU_Have_Atomics || GNU_Have_LibAtomic) + { + static if (T.sizeof == byte.sizeof) + res = __atomic_compare_exchange_1(cast(shared)dest, compare, *cast(ubyte*)&value, + weak, succ, fail); + else static if (T.sizeof == short.sizeof) + res = __atomic_compare_exchange_2(cast(shared)dest, compare, *cast(ushort*)&value, + weak, succ, fail); + else static if (T.sizeof == int.sizeof) + res = __atomic_compare_exchange_4(cast(shared)dest, compare, *cast(uint*)&value, + weak, succ, fail); + else static if (T.sizeof == long.sizeof && GNU_Have_64Bit_Atomics) + res = __atomic_compare_exchange_8(cast(shared)dest, compare, *cast(ulong*)&value, + weak, succ, fail); + else static if (GNU_Have_LibAtomic) + res = __atomic_compare_exchange(T.sizeof, cast(shared)dest, compare, cast(void*)&value, + succ, fail); + else + static assert(0, "Invalid template type specified."); + } + else + { + static if (T.sizeof == byte.sizeof) + alias U = byte; + else static if (T.sizeof == short.sizeof) + alias U = short; + else static if (T.sizeof == int.sizeof) + alias U = int; + else static if (T.sizeof == long.sizeof) + alias U = long; + else + static assert(0, "Invalid template type specified."); - Lend: + getAtomicMutex.lock(); + scope(exit) getAtomicMutex.unlock(); - ret; + if (*cast(U*)dest == *cast(U*)&compare) + { + *dest = value; + res = true; } - } - else version (D_InlineAsm_X86_64) - { - asm pure nothrow @nogc @trusted + else { - naked; - mfence; - ret; + *compare = *dest; + res = false; } } - else - static assert (false, "Unsupported architecture."); + + return res; } -} -void pause() pure nothrow @nogc @trusted -{ - version (D_InlineAsm_X86) + void atomicFence(MemoryOrder order = MemoryOrder.seq)() pure nothrow @nogc @trusted { - asm pure nothrow @nogc @trusted + static if (GNU_Have_Atomics || GNU_Have_LibAtomic) + __atomic_thread_fence(order); + else { - naked; - rep; nop; - ret; + getAtomicMutex.lock(); + getAtomicMutex.unlock(); } } - else version (D_InlineAsm_X86_64) + + void pause() pure nothrow @nogc @trusted { - asm pure nothrow @nogc @trusted + version (X86) + { + __builtin_ia32_pause(); + } + else version (X86_64) + { + __builtin_ia32_pause(); + } + else { - naked; -// pause; // TODO: DMD should add this opcode to its inline asm - rep; nop; - ret; + // Other architectures? Some sort of nop or barrier. } } - else + + static if (!GNU_Have_Atomics && !GNU_Have_LibAtomic) { - // ARM should `yield` - // other architectures? otherwise some sort of nop... + // Use system mutex for atomics, faking the purity of the functions so + // that they can be used in pure/nothrow/@safe code. + extern (C) private pure @trusted @nogc nothrow + { + static if (GNU_Thread_Model == ThreadModel.Posix) + { + import core.sys.posix.pthread; + alias atomicMutexHandle = pthread_mutex_t; + + pragma(mangle, "pthread_mutex_init") int fakePureMutexInit(pthread_mutex_t*, pthread_mutexattr_t*); + pragma(mangle, "pthread_mutex_lock") int fakePureMutexLock(pthread_mutex_t*); + pragma(mangle, "pthread_mutex_unlock") int fakePureMutexUnlock(pthread_mutex_t*); + } + else static if (GNU_Thread_Model == ThreadModel.Win32) + { + import core.sys.windows.winbase; + alias atomicMutexHandle = CRITICAL_SECTION; + + pragma(mangle, "InitializeCriticalSection") int fakePureMutexInit(CRITICAL_SECTION*); + pragma(mangle, "EnterCriticalSection") void fakePureMutexLock(CRITICAL_SECTION*); + pragma(mangle, "LeaveCriticalSection") int fakePureMutexUnlock(CRITICAL_SECTION*); + } + else + { + alias atomicMutexHandle = int; + } + } + + // Implements lock/unlock operations. + private struct AtomicMutex + { + int lock() pure @trusted @nogc nothrow + { + static if (GNU_Thread_Model == ThreadModel.Posix) + { + if (!_inited) + { + fakePureMutexInit(&_handle, null); + _inited = true; + } + return fakePureMutexLock(&_handle); + } + else + { + static if (GNU_Thread_Model == ThreadModel.Win32) + { + if (!_inited) + { + fakePureMutexInit(&_handle); + _inited = true; + } + fakePureMutexLock(&_handle); + } + return 0; + } + } + + int unlock() pure @trusted @nogc nothrow + { + static if (GNU_Thread_Model == ThreadModel.Posix) + return fakePureMutexUnlock(&_handle); + else + { + static if (GNU_Thread_Model == ThreadModel.Win32) + fakePureMutexUnlock(&_handle); + return 0; + } + } + + private: + atomicMutexHandle _handle; + bool _inited; + } + + // Internal static mutex reference. + private AtomicMutex* _getAtomicMutex() @trusted @nogc nothrow + { + __gshared static AtomicMutex mutex; + return &mutex; + } + + // Pure alias for _getAtomicMutex. + pragma(mangle, _getAtomicMutex.mangleof) + private AtomicMutex* getAtomicMutex() pure @trusted @nogc nothrow @property; } }