From d388a08e8fec0a9e569e8f6e75dd25ac95e4a4ac Mon Sep 17 00:00:00 2001 From: Manu Evans Date: Sun, 18 Aug 2019 16:53:14 -0700 Subject: [PATCH 1/5] Move some traits to druntime. --- src/core/internal/traits.d | 158 +++++++++++++++++++++++++++++++------ 1 file changed, 132 insertions(+), 26 deletions(-) diff --git a/src/core/internal/traits.d b/src/core/internal/traits.d index bccf1ad356..30519dc276 100644 --- a/src/core/internal/traits.d +++ b/src/core/internal/traits.d @@ -8,23 +8,22 @@ */ module core.internal.traits; -/// taken from std.typetuple.TypeTuple -template TypeTuple(TList...) -{ - alias TypeTuple = TList; -} -alias AliasSeq = TypeTuple; -template FieldTypeTuple(T) +// TODO: deprecate these old names...? +alias TypeTuple = AliasSeq; +alias FieldTypeTuple = Fields; + + +alias AliasSeq(TList...) = TList; + +template Fields(T) { static if (is(T == struct) || is(T == union)) - alias FieldTypeTuple = typeof(T.tupleof[0 .. $ - __traits(isNested, T)]); + alias Fields = typeof(T.tupleof[0 .. $ - __traits(isNested, T)]); else static if (is(T == class)) - alias FieldTypeTuple = typeof(T.tupleof); + alias Fields = typeof(T.tupleof); else - { - alias FieldTypeTuple = TypeTuple!T; - } + alias Fields = AliasSeq!T; } T trustedCast(T, U)(auto ref U u) @trusted pure nothrow @@ -66,6 +65,20 @@ template Unqual(T) } } +// [For internal use] +package template ModifyTypePreservingTQ(alias Modifier, T) +{ + static if (is(T U == immutable U)) alias ModifyTypePreservingTQ = immutable Modifier!U; + else static if (is(T U == shared inout const U)) alias ModifyTypePreservingTQ = shared inout const Modifier!U; + else static if (is(T U == shared inout U)) alias ModifyTypePreservingTQ = shared inout Modifier!U; + else static if (is(T U == shared const U)) alias ModifyTypePreservingTQ = shared const Modifier!U; + else static if (is(T U == shared U)) alias ModifyTypePreservingTQ = shared Modifier!U; + else static if (is(T U == inout const U)) alias ModifyTypePreservingTQ = inout const Modifier!U; + else static if (is(T U == inout U)) alias ModifyTypePreservingTQ = inout Modifier!U; + else static if (is(T U == const U)) alias ModifyTypePreservingTQ = const Modifier!U; + else alias ModifyTypePreservingTQ = Modifier!T; +} + // Substitute all `inout` qualifiers that appears in T to `const` template substInout(T) { @@ -187,12 +200,12 @@ template allSatisfy(alias F, T...) } // taken from std.meta.anySatisfy -template anySatisfy(alias F, T...) +template anySatisfy(alias F, Ts...) { - static foreach (Ti; T) + static foreach (T; Ts) { static if (!is(typeof(anySatisfy) == bool) && // not yet defined - F!(Ti)) + F!T) { enum anySatisfy = true; } @@ -220,17 +233,6 @@ template maxAlignment(U...) } } -// std.traits.Fields -template Fields(T) -{ - static if (is(T == struct) || is(T == union)) - alias Fields = typeof(T.tupleof[0 .. $ - __traits(isNested, T)]); - else static if (is(T == class)) - alias Fields = typeof(T.tupleof); - else - alias Fields = TypeTuple!T; -} - /// See $(REF hasElaborateMove, std,traits) template hasElaborateMove(S) { @@ -303,6 +305,110 @@ template hasElaborateAssign(S) } } +template hasIndirections(T) +{ + static if (is(T == struct) || is(T == union)) + enum hasIndirections = anySatisfy!(.hasIndirections, Fields!T); + else static if (__traits(isStaticArray, T) && is(T : E[N], E, size_t N)) + enum hasIndirections = is(E == void) ? true : hasIndirections!E; + else static if (isFunctionPointer!T) + enum hasIndirections = false; + else + enum hasIndirections = isPointer!T || isDelegate!T || isDynamicArray!T || + __traits(isAssociativeArray, T) || is (T == class) || is(T == interface); +} + +template hasUnsharedIndirections(T) +{ + static if (is(T == struct) || is(T == union)) + enum hasUnsharedIndirections = anySatisfy!(.hasUnsharedIndirections, Fields!T); + else static if (is(T : E[N], E, size_t N)) + enum hasUnsharedIndirections = is(E == void) ? false : hasUnsharedIndirections!E; + else static if (isFunctionPointer!T) + enum hasUnsharedIndirections = false; + else static if (isPointer!T) + enum hasUnsharedIndirections = !is(T : shared(U)*, U); + else static if (isDynamicArray!T) + enum hasUnsharedIndirections = !is(T : shared(V)[], V); + else static if (is(T == class) || is(T == interface)) + enum hasUnsharedIndirections = !is(T : shared(W), W); + else + enum hasUnsharedIndirections = isDelegate!T || __traits(isAssociativeArray, T); // TODO: how to handle these? +} + +enum bool isAggregateType(T) = is(T == struct) || is(T == union) || + is(T == class) || is(T == interface); + +enum bool isPointer(T) = is(T == U*, U) && !isAggregateType!T; + +enum bool isDynamicArray(T) = is(DynamicArrayTypeOf!T) && !isAggregateType!T; + +template OriginalType(T) +{ + template Impl(T) + { + static if (is(T U == enum)) alias Impl = OriginalType!U; + else alias Impl = T; + } + + alias OriginalType = ModifyTypePreservingTQ!(Impl, T); +} + +template DynamicArrayTypeOf(T) +{ + static if (is(AliasThisTypeOf!T AT) && !is(AT[] == AT)) + alias X = DynamicArrayTypeOf!AT; + else + alias X = OriginalType!T; + + static if (is(Unqual!X : E[], E) && !is(typeof({ enum n = X.length; }))) + alias DynamicArrayTypeOf = X; + else + static assert(0, T.stringof ~ " is not a dynamic array"); +} + +private template AliasThisTypeOf(T) + if (isAggregateType!T) +{ + alias members = __traits(getAliasThis, T); + + static if (members.length == 1) + alias AliasThisTypeOf = typeof(__traits(getMember, T.init, members[0])); + else + static assert(0, T.stringof~" does not have alias this type"); +} + +template isFunctionPointer(T...) + if (T.length == 1) +{ + static if (is(T[0] U) || is(typeof(T[0]) U)) + { + static if (is(U F : F*) && is(F == function)) + enum bool isFunctionPointer = true; + else + enum bool isFunctionPointer = false; + } + else + enum bool isFunctionPointer = false; +} + +template isDelegate(T...) + if (T.length == 1) +{ + static if (is(typeof(& T[0]) U : U*) && is(typeof(& T[0]) U == delegate)) + { + // T is a (nested) function symbol. + enum bool isDelegate = true; + } + else static if (is(T[0] W) || is(typeof(T[0]) W)) + { + // T is an expression or a type. Take the type of it and examine. + enum bool isDelegate = is(W == delegate); + } + else + enum bool isDelegate = false; +} + // std.meta.Filter template Filter(alias pred, TList...) { From f8ec8e3526557c719c60d178318483a653434ca9 Mon Sep 17 00:00:00 2001 From: Manu Evans Date: Sat, 17 Aug 2019 19:32:51 -0700 Subject: [PATCH 2/5] Extract atomic platform specific implementation into an implementation file. --- CODEOWNERS | 4 +- mak/COPY | 1 + mak/SRCS | 1 + mak/WINDOWS | 3 + src/core/atomic.d | 2034 +++++++++--------------------------- src/core/internal/atomic.d | 533 ++++++++++ 6 files changed, 1060 insertions(+), 1516 deletions(-) create mode 100644 src/core/internal/atomic.d diff --git a/CODEOWNERS b/CODEOWNERS index 50ee7e8455..bcdb157e21 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -15,7 +15,7 @@ src/checkedint.d @redstar @andralex @JackStouffer -src/core/atomic.d @WalterBright @ibuclaw +src/core/atomic.d @WalterBright @ibuclaw @TurkeyMan src/core/attribute.d @jacob-carlborg src/core/bitop.d @schveiguy @tsbockman @Geod24 src/core/cpuid.d @WalterBright @ibuclaw @JackStouffer @@ -26,7 +26,7 @@ src/core/math.d @ibuclaw @redstar src/core/runtime.d @MartinNowak @Abscissa src/core/simd.d @WalterBright @MartinNowak src/core/stdc/* @schveiguy @ibuclaw -src/core/stdcpp/* @WalterBright @Darredevil +src/core/stdcpp/* @WalterBright @Darredevil @TurkeyMan src/core/sync/* @MartinNowak @Geod24 @WalterBright @ZombineDev src/core/sys/bionic/* @joakim-noah src/core/sys/darwin/* @jacob-carlborg @klickverbot @etcimon @MartinNowak diff --git a/mak/COPY b/mak/COPY index 1a8342116b..2962e567e4 100644 --- a/mak/COPY +++ b/mak/COPY @@ -23,6 +23,7 @@ COPY=\ \ $(IMPDIR)\core\internal\abort.d \ $(IMPDIR)\core\internal\arrayop.d \ + $(IMPDIR)\core\internal\atomic.d \ $(IMPDIR)\core\internal\attributes.d \ $(IMPDIR)\core\internal\convert.d \ $(IMPDIR)\core\internal\dassert.d \ diff --git a/mak/SRCS b/mak/SRCS index cc0e925154..e744d0c992 100644 --- a/mak/SRCS +++ b/mak/SRCS @@ -23,6 +23,7 @@ SRCS=\ \ src\core\internal\abort.d \ src\core\internal\arrayop.d \ + src\core\internal\atomic.d \ src\core\internal\convert.d \ src\core\internal\dassert.d \ src\core\internal\hash.d \ diff --git a/mak/WINDOWS b/mak/WINDOWS index 4cd551976a..cccb5cc480 100644 --- a/mak/WINDOWS +++ b/mak/WINDOWS @@ -123,6 +123,9 @@ $(IMPDIR)\core\internal\abort.d : src\core\internal\abort.d $(IMPDIR)\core\internal\arrayop.d : src\core\internal\arrayop.d copy $** $@ +$(IMPDIR)\core\internal\atomic.d : src\core\internal\atomic.d + copy $** $@ + $(IMPDIR)\core\internal\attributes.d : src\core\internal\attributes.d copy $** $@ diff --git a/src/core/atomic.d b/src/core/atomic.d index 6b54587199..04b045d2ab 100644 --- a/src/core/atomic.d +++ b/src/core/atomic.d @@ -10,6 +10,7 @@ module core.atomic; +import core.internal.atomic; import core.internal.attributes : betterC; version (D_InlineAsm_X86) @@ -35,123 +36,6 @@ else enum has128BitCAS = false; } -private -{ - /* Construct a type with a shared tail, and if possible with an unshared - head. */ - template TailShared(U) if (!is(U == shared)) - { - alias TailShared = .TailShared!(shared U); - } - template TailShared(S) if (is(S == shared)) - { - // Get the unshared variant of S. - static if (is(S U == shared U)) {} - else static assert(false, "Should never be triggered. The `static " ~ - "if` declares `U` as the unshared version of the shared type " ~ - "`S`. `S` is explicitly declared as shared, so getting `U` " ~ - "should always work."); - - static if (is(S : U)) - alias TailShared = U; - else static if (is(S == struct)) - { - enum implName = () { - /* Start with "_impl". If S has a field with that name, append - underscores until the clash is resolved. */ - string name = "_impl"; - string[] fieldNames; - static foreach (alias field; S.tupleof) - { - fieldNames ~= __traits(identifier, field); - } - static bool canFind(string[] haystack, string needle) - { - foreach (candidate; haystack) - { - if (candidate == needle) return true; - } - return false; - } - while (canFind(fieldNames, name)) name ~= "_"; - return name; - } (); - struct TailShared - { - static foreach (i, alias field; S.tupleof) - { - /* On @trusted: This is casting the field from shared(Foo) - to TailShared!Foo. The cast is safe because the field has - been loaded and is not shared anymore. */ - mixin(" - @trusted @property - ref " ~ __traits(identifier, field) ~ "() - { - alias R = TailShared!(typeof(field)); - return * cast(R*) &" ~ implName ~ ".tupleof[i]; - } - "); - } - mixin(" - S " ~ implName ~ "; - alias " ~ implName ~ " this; - "); - } - } - else - alias TailShared = S; - } - @safe unittest - { - // No tail (no indirections) -> fully unshared. - - static assert(is(TailShared!int == int)); - static assert(is(TailShared!(shared int) == int)); - - static struct NoIndir { int i; } - static assert(is(TailShared!NoIndir == NoIndir)); - static assert(is(TailShared!(shared NoIndir) == NoIndir)); - - // Tail can be independently shared or is already -> tail-shared. - - static assert(is(TailShared!(int*) == shared(int)*)); - static assert(is(TailShared!(shared int*) == shared(int)*)); - static assert(is(TailShared!(shared(int)*) == shared(int)*)); - - static assert(is(TailShared!(int[]) == shared(int)[])); - static assert(is(TailShared!(shared int[]) == shared(int)[])); - static assert(is(TailShared!(shared(int)[]) == shared(int)[])); - - static struct S1 { shared int* p; } - static assert(is(TailShared!S1 == S1)); - static assert(is(TailShared!(shared S1) == S1)); - - static struct S2 { shared(int)* p; } - static assert(is(TailShared!S2 == S2)); - static assert(is(TailShared!(shared S2) == S2)); - - // Tail follows shared-ness of head -> fully shared. - - static class C { int i; } - static assert(is(TailShared!C == shared C)); - static assert(is(TailShared!(shared C) == shared C)); - - /* However, structs get a wrapper that has getters which cast to - TailShared. */ - - static struct S3 { int* p; int _impl; int _impl_; int _impl__; } - static assert(!is(TailShared!S3 : S3)); - static assert(is(TailShared!S3 : shared S3)); - static assert(is(TailShared!(shared S3) == TailShared!S3)); - - static struct S4 { shared(int)** p; } - static assert(!is(TailShared!S4 : S4)); - static assert(is(TailShared!S4 : shared S4)); - static assert(is(TailShared!(shared S4) == TailShared!S4)); - } -} - - version (AsmX86) { // NOTE: Strictly speaking, the x86 supports atomic operations on @@ -173,1279 +57,435 @@ version (AsmX86) } } - -version (CoreDdoc) +/** + * Specifies the memory ordering semantics of an atomic operation. + * + * See_Also: + * $(HTTP en.cppreference.com/w/cpp/atomic/memory_order) + */ +enum MemoryOrder { /** - * Performs the binary operation 'op' on val using 'mod' as the modifier. - * - * Params: - * val = The target variable. - * mod = The modifier to apply. - * - * Returns: - * The result of the operation. - */ - TailShared!T atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc @safe - if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) ) - { - return TailShared!T.init; - } - - /** - * Atomically adds `mod` to the value referenced by `val` and returns the value `val` held previously. - * This operation is both lock-free and atomic. - * - * Params: - * val = Reference to the value to modify. - * mod = The value to add. - * - * Returns: - * The value held previously by `val`. - */ - TailShared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe; - - /** - * Atomically subtracts `mod` from the value referenced by `val` and returns the value `val` held previously. - * This operation is both lock-free and atomic. - * - * Params: - * val = Reference to the value to modify. - * mod = The value to subtract. - * - * Returns: - * The value held previously by `val`. - */ - TailShared!(T) atomicFetchSub(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe; - - /** - * Exchange `exchangeWith` with the memory referenced by `here`. - * This operation is both lock-free and atomic. - * - * Params: - * here = The address of the destination variable. - * exchangeWith = The value to exchange. - * - * Returns: - * The value held previously by `here`. - */ - shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, V exchangeWith ) pure nothrow @nogc @safe - if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = exchangeWith; } ) ); - - /// Ditto - shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, shared(V) exchangeWith ) pure nothrow @nogc @safe - if ( is(T == class) && __traits( compiles, { *here = exchangeWith; } ) ); - - /// Ditto - shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, shared(V)* exchangeWith ) pure nothrow @nogc @safe - if ( is(T U : U*) && __traits( compiles, { *here = exchangeWith; } ) ); - - /** - * Stores 'writeThis' to the memory referenced by 'here' if the value - * referenced by 'here' is equal to 'ifThis'. This operation is both - * lock-free and atomic. - * - * Params: - * here = The address of the destination variable. - * writeThis = The value to store. - * ifThis = The comparison value. - * - * Returns: - * true if the store occurred, false if not. + * Not sequenced. + * Corresponds to $(LINK2 https://llvm.org/docs/Atomics.html#monotonic, LLVM AtomicOrdering.Monotonic) + * and C++11/C11 `memory_order_relaxed`. */ - bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe - if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ); - - /// Ditto - bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe - if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ); - - /// Ditto - bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe - if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ); - - /** - * Stores 'writeThis' to the memory referenced by 'here' if the value - * referenced by 'here' is equal to the value referenced by 'ifThis'. - * The prior value referenced by 'here' is written to `ifThis` and - * returned to the user. This operation is both lock-free and atomic. - * - * Params: - * here = The address of the destination variable. - * writeThis = The value to store. - * ifThis = The address of the value to compare, and receives the prior value of `here` as output. - * - * Returns: - * true if the store occurred, false if not. - */ - bool cas(T,V1,V2)( shared(T)* here, V1* ifThis, V2 writeThis ) pure nothrow @nogc @safe - if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ); - - /// Ditto - bool cas(T,V1,V2)( shared(T)* here, shared(V1)* ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe - if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ); - - /// Ditto - bool cas(T,V1,V2)( shared(T)* here, shared(V1)** ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe - if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ); - + raw, /** - * Loads 'val' from memory and returns it. The memory barrier specified - * by 'ms' is applied to the operation, which is fully sequenced by - * default. Valid memory orders are MemoryOrder.raw, MemoryOrder.acq, - * and MemoryOrder.seq. - * - * Params: - * val = The target variable. - * - * Returns: - * The value of 'val'. + * Hoist-load + hoist-store barrier. + * Corresponds to $(LINK2 https://llvm.org/docs/Atomics.html#acquire, LLVM AtomicOrdering.Acquire) + * and C++11/C11 `memory_order_acquire`. */ - TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq,T)( ref const shared T val ) pure nothrow @nogc @safe - { - return TailShared!T.init; - } - - + acq, /** - * Writes 'newval' into 'val'. The memory barrier specified by 'ms' is - * applied to the operation, which is fully sequenced by default. - * Valid memory orders are MemoryOrder.raw, MemoryOrder.rel, and - * MemoryOrder.seq. - * - * Params: - * val = The target variable. - * newval = The value to store. + * Sink-load + sink-store barrier. + * Corresponds to $(LINK2 https://llvm.org/docs/Atomics.html#release, LLVM AtomicOrdering.Release) + * and C++11/C11 `memory_order_release`. */ - void atomicStore(MemoryOrder ms = MemoryOrder.seq,T,V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe - if ( __traits( compiles, { val = newval; } ) ) - { - - } - - + rel, /** - * Specifies the memory ordering semantics of an atomic operation. - * - * See_Also: - * $(HTTP en.cppreference.com/w/cpp/atomic/memory_order) + * Acquire + release barrier. + * Corresponds to $(LINK2 https://llvm.org/docs/Atomics.html#acquirerelease, LLVM AtomicOrdering.AcquireRelease) + * and C++11/C11 `memory_order_acq_rel`. */ - enum MemoryOrder - { - /++ - Not sequenced. - Corresponds to $(LINK2 https://llvm.org/docs/Atomics.html#monotonic, LLVM AtomicOrdering.Monotonic) - and C++11/C11 `memory_order_relaxed`. - +/ - raw, - /++ - Hoist-load + hoist-store barrier. - Corresponds to $(LINK2 https://llvm.org/docs/Atomics.html#acquire, LLVM AtomicOrdering.Acquire) - and C++11/C11 `memory_order_acquire`. - +/ - acq, - /++ - Sink-load + sink-store barrier. - Corresponds to $(LINK2 https://llvm.org/docs/Atomics.html#release, LLVM AtomicOrdering.Release) - and C++11/C11 `memory_order_release`. - +/ - rel, - /++ - Fully sequenced (acquire + release). Corresponds to - $(LINK2 https://llvm.org/docs/Atomics.html#sequentiallyconsistent, LLVM AtomicOrdering.SequentiallyConsistent) - and C++11/C11 `memory_order_seq_cst`. - +/ - seq, - } - + acq_rel, /** - * Inserts a full load/store memory fence (on platforms that need it). This ensures - * that all loads and stores before a call to this function are executed before any - * loads and stores after the call. + * Fully sequenced (acquire + release). Corresponds to + * $(LINK2 https://llvm.org/docs/Atomics.html#sequentiallyconsistent, LLVM AtomicOrdering.SequentiallyConsistent) + * and C++11/C11 `memory_order_seq_cst`. */ - void atomicFence() nothrow @nogc; + seq, } -else version (AsmX86_32) + +/** + * Atomically adds `mod` to the value referenced by `val` and returns the value `val` held previously. + * This operation is both lock-free and atomic. + * + * Params: + * val = Reference to the value to modify. + * mod = The value to add. + * + * Returns: + * The value held previously by `val`. + */ +TailShared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted + if ( __traits(isIntegral, T) ) +in ( atomicValueIsProperlyAligned(val) ) { - // Uses specialized asm for fast fetch and add operations - TailShared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe - if ( T.sizeof <= 4 ) - { - asm pure nothrow @nogc @trusted - { - mov EAX, mod; - mov EDX, val; - } - static if (T.sizeof == 1) asm pure nothrow @nogc @trusted { lock; xadd[EDX], AL; } - else static if (T.sizeof == 2) asm pure nothrow @nogc @trusted { lock; xadd[EDX], AX; } - else static if (T.sizeof == 4) asm pure nothrow @nogc @trusted { lock; xadd[EDX], EAX; } - } + return core.internal.atomic.atomicFetchAdd( &val, cast(T)mod ); +} - TailShared!(T) atomicFetchSub(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe - if ( T.sizeof <= 4) - { - return atomicFetchAdd(val, -mod); - } +/** + * Atomically subtracts `mod` from the value referenced by `val` and returns the value `val` held previously. + * This operation is both lock-free and atomic. + * + * Params: + * val = Reference to the value to modify. + * mod = The value to subtract. + * + * Returns: + * The value held previously by `val`. + */ +TailShared!(T) atomicFetchSub(T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted + if ( __traits(isIntegral, T) ) +in ( atomicValueIsProperlyAligned(val) ) +{ + return core.internal.atomic.atomicFetchSub( &val, cast(T)mod ); +} - TailShared!T atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc - if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) ) - in - { - assert(atomicValueIsProperlyAligned(val)); - } - do +/** + * Exchange `exchangeWith` with the memory referenced by `here`. + * This operation is both lock-free and atomic. + * + * Params: + * here = The address of the destination variable. + * exchangeWith = The value to exchange. + * + * Returns: + * The value held previously by `here`. + */ +shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, V exchangeWith ) pure nothrow @nogc @trusted + if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = exchangeWith; } ) ) +in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) +{ + static if ( __traits(isFloating, V) ) { - // binary operators - // - // + - * / % ^^ & - // | ^ << >> >>> ~ in - // == != < <= > >= - static if (op == "+" || op == "-" || op == "*" || op == "/" || - op == "%" || op == "^^" || op == "&" || op == "|" || - op == "^" || op == "<<" || op == ">>" || op == ">>>" || - op == "~" || // skip "in" - op == "==" || op == "!=" || op == "<" || op == "<=" || - op == ">" || op == ">=") - { - TailShared!T get = atomicLoad!(MemoryOrder.raw)( val ); - mixin( "return get " ~ op ~ " mod;" ); - } - else - // assignment operators - // - // += -= *= /= %= ^^= &= - // |= ^= <<= >>= >>>= ~= - static if ( op == "+=" && __traits(isIntegral, T) && T.sizeof <= 4 && V1.sizeof <= 4) - { - return cast(T)(atomicFetchAdd!(T)(val, mod) + mod); - } - else static if ( op == "-=" && __traits(isIntegral, T) && T.sizeof <= 4 && V1.sizeof <= 4) - { - return cast(T)(atomicFetchSub!(T)(val, mod) - mod); - } - else static if ( op == "+=" || op == "-=" || op == "*=" || op == "/=" || - op == "%=" || op == "^^=" || op == "&=" || op == "|=" || - op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~=" - { - TailShared!T get, set; - - do - { - get = set = atomicLoad!(MemoryOrder.raw)( val ); - mixin( "set " ~ op ~ " mod;" ); - } while ( !casByRef( val, get, set ) ); - return set; - } + static if ( V.sizeof == 4 ) + alias I = uint; + else static if ( V.sizeof == 8 ) + alias I = ulong; else - { - static assert( false, "Operation not supported." ); - } - } - - shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, V exchangeWith ) pure nothrow @nogc @safe - if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = exchangeWith; } ) ) - { - return atomicExchangeImpl(here, exchangeWith); - } - - shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, shared(V) exchangeWith ) pure nothrow @nogc @safe - if ( is(T == class) && __traits( compiles, { *here = exchangeWith; } ) ) - { - return atomicExchangeImpl(here, exchangeWith); - } - - shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, shared(V)* exchangeWith ) pure nothrow @nogc @safe - if ( is(T U : U*) && __traits( compiles, { *here = exchangeWith; } ) ) - { - return atomicExchangeImpl(here, exchangeWith); - } - - private shared(T) atomicExchangeImpl(T,V)( shared(T)* here, V exchangeWith ) pure nothrow @nogc @safe - in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) - { - static if ( T.sizeof == byte.sizeof ) - { - asm pure nothrow @nogc @trusted - { - mov AL, exchangeWith; - mov ECX, here; - xchg [ECX], AL; - } - } - else static if ( T.sizeof == short.sizeof ) - { - asm pure nothrow @nogc @trusted - { - mov AX, exchangeWith; - mov ECX, here; - xchg [ECX], AX; - } - } - else static if ( T.sizeof == int.sizeof ) - { - asm pure nothrow @nogc @trusted - { - mov EAX, exchangeWith; - mov ECX, here; - xchg [ECX], EAX; - } - static if ( __traits(isFloating, T) ) - { - asm pure nothrow @nogc @trusted - { - mov exchangeWith, EAX; - } - return exchangeWith; - } - } - else - { - static assert( false, "Invalid template type specified." ); - } - } - - bool casByRef(T,V1,V2)( ref T value, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted - { - return cas(&value, ifThis, writeThis); - } - - bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe - if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) - { - return casImplNoResult(here, ifThis, writeThis); - } - - bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe - if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ) - { - return casImplNoResult(here, ifThis, writeThis); - } - - bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe - if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) - { - return casImplNoResult(here, ifThis, writeThis); - } - - private bool casImplNoResult(T,V1,V2)( shared(T)* here, V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe - in - { - assert( atomicPtrIsProperlyAligned( here ) ); - } - do - { - static if ( T.sizeof == byte.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 1 Byte CAS - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - mov DL, writeThis; - mov AL, ifThis; - mov ECX, here; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], DL; - setz AL; - } - } - else static if ( T.sizeof == short.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 2 Byte CAS - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - mov DX, writeThis; - mov AX, ifThis; - mov ECX, here; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], DX; - setz AL; - } - } - else static if ( T.sizeof == int.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 4 Byte CAS - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - mov EDX, writeThis; - mov EAX, ifThis; - mov ECX, here; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], EDX; - setz AL; - } - } - else static if ( T.sizeof == long.sizeof && has64BitCAS ) - { - - ////////////////////////////////////////////////////////////////// - // 8 Byte CAS on a 32-Bit Processor - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - push EDI; - push EBX; - lea EDI, writeThis; - mov EBX, [EDI]; - mov ECX, 4[EDI]; - lea EDI, ifThis; - mov EAX, [EDI]; - mov EDX, 4[EDI]; - mov EDI, here; - lock; // lock always needed to make this op atomic - cmpxchg8b [EDI]; - setz AL; - pop EBX; - pop EDI; - } - } - else - { - static assert( false, "Invalid template type specified." ); - } - } - - bool cas(T,V1,V2)( shared(T)* here, V1* ifThis, V2 writeThis ) pure nothrow @nogc @safe - if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) - { - return casImplWithResult(here, *ifThis, writeThis); - } - - bool cas(T,V1,V2)( shared(T)* here, shared(V1)* ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe - if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ) - { - return casImplWithResult(here, *ifThis, writeThis); - } - - bool cas(T,V1,V2)( shared(T)* here, shared(V1*)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe - if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) - { - return casImplWithResult(here, *ifThis, writeThis); - } - - private bool casImplWithResult(T,V1,V2)( shared(T)* here, ref V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe - in - { - assert( atomicPtrIsProperlyAligned( here ) ); - } - do - { - static if ( T.sizeof == byte.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 1 Byte CAS - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - push EDI; - mov DL, writeThis; - mov EDI, ifThis; - mov AL, [EDI]; - mov ECX, here; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], DL; - mov [EDI], AL; - setz AL; - pop EDI; - } - } - else static if ( T.sizeof == short.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 2 Byte CAS - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - push EDI; - mov DX, writeThis; - mov EDI, ifThis; - mov AX, [EDI]; - mov ECX, here; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], DX; - mov [EDI], AX; - setz AL; - pop EDI; - } - } - else static if ( T.sizeof == int.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 4 Byte CAS - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - push EDI; - mov EDX, writeThis; - mov EDI, ifThis; - mov EAX, [EDI]; - mov ECX, here; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], EDX; - mov [EDI], EAX; - setz AL; - pop EDI; - } - } - else static if ( T.sizeof == long.sizeof && has64BitCAS ) - { - - ////////////////////////////////////////////////////////////////// - // 8 Byte CAS on a 32-Bit Processor - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - push EDI; - push EBX; - lea EDI, writeThis; - mov EBX, [EDI]; - mov ECX, 4[EDI]; - mov EDI, ifThis; - mov EAX, [EDI]; - mov EDX, 4[EDI]; - mov EDI, here; - lock; // lock always needed to make this op atomic - cmpxchg8b [EDI]; - mov EDI, ifThis; - mov [EDI], EAX; - mov 4[EDI], EDX; - setz AL; - pop EBX; - pop EDI; - } - } - else - { - static assert( false, "Invalid template type specified." ); - } - } - - - enum MemoryOrder - { - raw, - acq, - rel, - seq, - } - - - private - { - // NOTE: x86 loads implicitly have acquire semantics so a memory - // barrier is only necessary on releases. - template needsLoadBarrier( MemoryOrder ms ) - { - enum bool needsLoadBarrier = ms == MemoryOrder.seq; - } - - - // NOTE: x86 stores implicitly have release semantics so a memory - // barrier is only necessary on acquires. - template needsStoreBarrier( MemoryOrder ms ) - { - enum bool needsStoreBarrier = ms == MemoryOrder.seq; - } - } - - - TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @safe - if (!__traits(isFloating, T)) - { - static assert( ms != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()" ); - static assert( __traits(isPOD, T), "argument to atomicLoad() must be POD" ); - - static if ( T.sizeof == byte.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 1 Byte Load - ////////////////////////////////////////////////////////////////// - - static if ( needsLoadBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov DL, 0; - mov AL, 0; - mov ECX, val; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], DL; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov AL, [EAX]; - } - } - } - else static if ( T.sizeof == short.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 2 Byte Load - ////////////////////////////////////////////////////////////////// - - static if ( needsLoadBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov DX, 0; - mov AX, 0; - mov ECX, val; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], DX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov AX, [EAX]; - } - } - } - else static if ( T.sizeof == int.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 4 Byte Load - ////////////////////////////////////////////////////////////////// - - static if ( needsLoadBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov EDX, 0; - mov EAX, 0; - mov ECX, val; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], EDX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov EAX, [EAX]; - } - } - } - else static if ( T.sizeof == long.sizeof && has64BitCAS ) - { - ////////////////////////////////////////////////////////////////// - // 8 Byte Load on a 32-Bit Processor - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - push EDI; - push EBX; - mov EBX, 0; - mov ECX, 0; - mov EAX, 0; - mov EDX, 0; - mov EDI, val; - lock; // lock always needed to make this op atomic - cmpxchg8b [EDI]; - pop EBX; - pop EDI; - } - } - else - { - static assert( false, "Invalid template type specified." ); - } - } - - void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe - if ( __traits( compiles, { val = newval; } ) ) - { - static assert( ms != MemoryOrder.acq, "invalid MemoryOrder for atomicStore()" ); - static assert( __traits(isPOD, T), "argument to atomicStore() must be POD" ); - - static if ( T.sizeof == byte.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 1 Byte Store - ////////////////////////////////////////////////////////////////// - - static if ( needsStoreBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov DL, newval; - lock; - xchg [EAX], DL; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov DL, newval; - mov [EAX], DL; - } - } - } - else static if ( T.sizeof == short.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 2 Byte Store - ////////////////////////////////////////////////////////////////// - - static if ( needsStoreBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov DX, newval; - lock; - xchg [EAX], DX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov DX, newval; - mov [EAX], DX; - } - } - } - else static if ( T.sizeof == int.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 4 Byte Store - ////////////////////////////////////////////////////////////////// - - static if ( needsStoreBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov EDX, newval; - lock; - xchg [EAX], EDX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov EDX, newval; - mov [EAX], EDX; - } - } - } - else static if ( T.sizeof == long.sizeof && has64BitCAS ) - { - ////////////////////////////////////////////////////////////////// - // 8 Byte Store on a 32-Bit Processor - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - push EDI; - push EBX; - lea EDI, newval; - mov EBX, [EDI]; - mov ECX, 4[EDI]; - mov EDI, val; - mov EAX, [EDI]; - mov EDX, 4[EDI]; - L1: lock; // lock always needed to make this op atomic - cmpxchg8b [EDI]; - jne L1; - pop EBX; - pop EDI; - } - } - else - { - static assert( false, "Invalid template type specified." ); - } + static assert( false, "Float type " ~ V.stringof ~ " not supported."); + I r = core.internal.atomic.atomicExchange(cast(shared(I)*)here, *cast(I*)&exchangeWith); + return *cast(shared(T)*)&r; } + else + return core.internal.atomic.atomicExchange(here, exchangeWith); +} +/// Ditto +shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, shared(V) exchangeWith ) pure nothrow @nogc @safe + if ( is(T == class) && __traits( compiles, { *here = exchangeWith; } ) ) +in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) +{ + return core.internal.atomic.atomicExchange(here, exchangeWith); +} - void atomicFence() nothrow @nogc @safe - { - import core.cpuid; +/// Ditto +shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, shared(V)* exchangeWith ) pure nothrow @nogc @safe + if ( is(T U : U*) && __traits( compiles, { *here = exchangeWith; } ) ) +in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) +{ + return core.internal.atomic.atomicExchange(here, exchangeWith); +} - asm pure nothrow @nogc @trusted - { - naked; +/** + * Stores 'writeThis' to the memory referenced by 'here' if the value + * referenced by 'here' is equal to 'ifThis'. This operation is both + * lock-free and atomic. + * + * Params: + * here = The address of the destination variable. + * writeThis = The value to store. + * ifThis = The comparison value. + * + * Returns: + * true if the store occurred, false if not. + */ +bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted + if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) +in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) +{ + static if ( __traits(isFloating, T) ) + { + static assert ( __traits(isFloating, V1) && __traits(isFloating, V2), "Mismatching argument types." ); + static if ( T.sizeof == 4 ) + alias IntTy = uint; + else static if ( T.sizeof == 8 ) + alias IntTy = ulong; + return atomicCompareExchangeStrongNoResult( cast(IntTy*)here, *cast(IntTy*)&ifThis, *cast(IntTy*)&writeThis ); + } + else + return atomicCompareExchangeStrongNoResult!( MemoryOrder.seq, MemoryOrder.seq, T )( cast(T*)here, cast()ifThis, cast()writeThis ); +} - call sse2; - test AL, AL; - jne Lcpuid; +/// Ditto +bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe + if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ) +in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) +{ + return atomicCompareExchangeStrongNoResult( here, ifThis, writeThis ); +} - // Fast path: We have SSE2, so just use mfence. - mfence; - jmp Lend; +/// Ditto +bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe + if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) +in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) +{ + return atomicCompareExchangeStrongNoResult( here, ifThis, writeThis ); +} - Lcpuid: +/** + * Stores 'writeThis' to the memory referenced by 'here' if the value + * referenced by 'here' is equal to the value referenced by 'ifThis'. + * The prior value referenced by 'here' is written to `ifThis` and + * returned to the user. This operation is both lock-free and atomic. + * + * Params: + * here = The address of the destination variable. + * writeThis = The value to store. + * ifThis = The address of the value to compare, and receives the prior value of `here` as output. + * + * Returns: + * true if the store occurred, false if not. + */ +bool cas(T,V)( shared(T)* here, shared(T)* ifThis, V writeThis ) pure nothrow @nogc @trusted + if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; *ifThis = *here; } ) ) +in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) +{ + static if ( __traits(isFloating, T) ) + { + static assert ( __traits(isFloating, V), "Mismatching argument types." ); + static if ( T.sizeof == 4 ) + alias IntTy = uint; + else static if ( T.sizeof == 8 ) + alias IntTy = ulong; + return atomicCompareExchangeStrong( cast(IntTy*)here, cast(IntTy*)ifThis, *cast(IntTy*)&writeThis ); + } + else + return atomicCompareExchangeStrong!( MemoryOrder.seq, MemoryOrder.seq, T )( cast(T*)here, cast(T*)ifThis, cast()writeThis ); +} - // Slow path: We use cpuid to serialize. This is - // significantly slower than mfence, but is the - // only serialization facility we have available - // on older non-SSE2 chips. - push EBX; +/// Ditto +bool cas(T,V)( shared(T)* here, shared(T)* ifThis, shared(V) writeThis ) pure nothrow @nogc @trusted + if ( is(T == class) && __traits( compiles, { *here = writeThis; *ifThis = *here; } ) ) +in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) +{ + return atomicCompareExchangeStrong( cast(T*)here, cast(T*)ifThis, cast()writeThis ); +} - mov EAX, 0; - cpuid; +/// Ditto +bool cas(T,V)( shared(T)* here, shared(T)* ifThis, shared(V)* writeThis ) pure nothrow @nogc @trusted + if ( is(T U : U*) && __traits( compiles, { *here = writeThis; *ifThis = *here; } ) ) +in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) +{ + return atomicCompareExchangeStrong!( MemoryOrder.seq, MemoryOrder.seq, T )( cast(T*)here, cast(T*)ifThis, writeThis ); +} - pop EBX; +/** + * Inserts a full load/store memory fence (on platforms that need it). This ensures + * that all loads and stores before a call to this function are executed before any + * loads and stores after the call. + */ +void atomicFence() nothrow @nogc @safe +{ + core.internal.atomic.atomicFence(); +} - Lend: - ret; - } - } -} -else version (AsmX86_64) +/** + * Performs the binary operation 'op' on val using 'mod' as the modifier. + * + * Params: + * val = The target variable. + * mod = The modifier to apply. + * + * Returns: + * The result of the operation. + */ +TailShared!T atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc @safe + if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) ) +in ( atomicValueIsProperlyAligned( val ) ) { - // Uses specialized asm for fast fetch and add operations - TailShared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted - if ( __traits(isIntegral, T) ) - in ( atomicValueIsProperlyAligned(val) ) - { - return atomicFetchAddImpl( val, mod ); - } - TailShared!(T) atomicFetchAddImpl(T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted - { - asm pure nothrow @nogc @trusted { naked; } - version (Windows) - { - asm pure nothrow @nogc @trusted { mov RAX, RCX; } - static if (T.sizeof == 1) asm pure nothrow @nogc @trusted { lock; xadd[RDX], AL; } - else static if (T.sizeof == 2) asm pure nothrow @nogc @trusted { lock; xadd[RDX], AX; } - else static if (T.sizeof == 4) asm pure nothrow @nogc @trusted { lock; xadd[RDX], EAX; } - else static if (T.sizeof == 8) asm pure nothrow @nogc @trusted { lock; xadd[RDX], RAX; } - } - else - { - asm pure nothrow @nogc @trusted { mov RAX, RDI; } - static if (T.sizeof == 1) asm pure nothrow @nogc @trusted { lock; xadd[RSI], AL; } - else static if (T.sizeof == 2) asm pure nothrow @nogc @trusted { lock; xadd[RSI], AX; } - else static if (T.sizeof == 4) asm pure nothrow @nogc @trusted { lock; xadd[RSI], EAX; } - else static if (T.sizeof == 8) asm pure nothrow @nogc @trusted { lock; xadd[RSI], RAX; } - } - asm pure nothrow @nogc @trusted { ret; } + // binary operators + // + // + - * / % ^^ & + // | ^ << >> >>> ~ in + // == != < <= > >= + static if ( op == "+" || op == "-" || op == "*" || op == "/" || + op == "%" || op == "^^" || op == "&" || op == "|" || + op == "^" || op == "<<" || op == ">>" || op == ">>>" || + op == "~" || // skip "in" + op == "==" || op == "!=" || op == "<" || op == "<=" || + op == ">" || op == ">=" ) + { + TailShared!T get = atomicLoad!(MemoryOrder.raw)( val ); + mixin( "return get " ~ op ~ " mod;" ); } - - TailShared!(T) atomicFetchSub(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe - if ( __traits(isIntegral, T) ) - in ( atomicValueIsProperlyAligned(val) ) + else + // assignment operators + // + // += -= *= /= %= ^^= &= + // |= ^= <<= >>= >>>= ~= + static if ( op == "+=" && __traits(isIntegral, T) && __traits(isIntegral, V1) && T.sizeof <= size_t.sizeof && V1.sizeof <= size_t.sizeof) { - return atomicFetchAddImpl(val, -mod); + return cast(T)( atomicFetchAdd!(T)( val, mod ) + mod ); } - - TailShared!T atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc - if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) ) - in + else static if ( op == "-=" && __traits(isIntegral, T) && __traits(isIntegral, V1) && T.sizeof <= size_t.sizeof && V1.sizeof <= size_t.sizeof) { - assert( atomicValueIsProperlyAligned(val)); + return cast(T)( atomicFetchSub!(T)( val, mod ) - mod ); } - do + else static if ( op == "+=" || op == "-=" || op == "*=" || op == "/=" || + op == "%=" || op == "^^=" || op == "&=" || op == "|=" || + op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~=" { - // binary operators - // - // + - * / % ^^ & - // | ^ << >> >>> ~ in - // == != < <= > >= - static if ( op == "+" || op == "-" || op == "*" || op == "/" || - op == "%" || op == "^^" || op == "&" || op == "|" || - op == "^" || op == "<<" || op == ">>" || op == ">>>" || - op == "~" || // skip "in" - op == "==" || op == "!=" || op == "<" || op == "<=" || - op == ">" || op == ">=" ) - { - TailShared!T get = atomicLoad!(MemoryOrder.raw)( val ); - mixin( "return get " ~ op ~ " mod;" ); - } - else - // assignment operators - // - // += -= *= /= %= ^^= &= - // |= ^= <<= >>= >>>= ~= - static if ( op == "+=" && __traits(isIntegral, T) && __traits(isIntegral, V1)) - { - return cast(T)(atomicFetchAdd!(T)(val, mod) + mod); - } - else static if ( op == "-=" && __traits(isIntegral, T) && __traits(isIntegral, V1)) - { - return cast(T)(atomicFetchSub!(T)(val, mod) - mod); - } - else static if ( op == "+=" || op == "-=" || op == "*=" || op == "/=" || - op == "%=" || op == "^^=" || op == "&=" || op == "|=" || - op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~=" - { - TailShared!T get, set; - - do - { - get = set = atomicLoad!(MemoryOrder.raw)( val ); - mixin( "set " ~ op ~ " mod;" ); - } while ( !casByRef( val, get, set ) ); - return set; - } - else - { - static assert( false, "Operation not supported." ); - } - } + TailShared!T get, set; - shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, V exchangeWith ) pure nothrow @nogc @trusted - if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = exchangeWith; } ) ) - in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) - { - static if ( __traits(isFloating, V) ) + do { - static if ( V.sizeof == 4 ) - alias I = uint; - else static if ( V.sizeof == 8 ) - alias I = ulong; - else - static assert( false, "Float type " ~ V.stringof ~ " not supported."); - I r = atomicExchangeImpl(cast(shared(I)*)here, *cast(I*)&exchangeWith); - return *cast(shared(T)*)&r; - } - else - return atomicExchangeImpl(here, exchangeWith); + get = set = atomicLoad!(MemoryOrder.raw)( val ); + mixin( "set " ~ op ~ " mod;" ); + } while ( !casByRef( val, get, set ) ); + return set; } - - shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, shared(V) exchangeWith ) pure nothrow @nogc @safe - if ( is(T == class) && __traits( compiles, { *here = exchangeWith; } ) ) - in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) + else { - return atomicExchangeImpl(here, exchangeWith); + static assert( false, "Operation not supported." ); } +} - shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, shared(V)* exchangeWith ) pure nothrow @nogc @safe - if ( is(T U : U*) && __traits( compiles, { *here = exchangeWith; } ) ) - in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) - { - return atomicExchangeImpl(here, exchangeWith); - } - private shared(T) atomicExchangeImpl(T,V)( shared(T)* here, V exchangeWith ) pure nothrow @nogc @safe +version (CoreDdoc) +{ + /** + * Loads 'val' from memory and returns it. The memory barrier specified + * by 'ms' is applied to the operation, which is fully sequenced by + * default. Valid memory orders are MemoryOrder.raw, MemoryOrder.acq, + * and MemoryOrder.seq. + * + * Params: + * val = The target variable. + * + * Returns: + * The value of 'val'. + */ + TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq,T)( ref const shared T val ) pure nothrow @nogc @safe { - // Windows: here = RDX, exchangeWith = RCX - // Posix: here = RSI, exchangeWith = RDI - static if ( T.sizeof == byte.sizeof ) - { - version (Windows) - { - asm pure nothrow @nogc @trusted - { - naked; - xchg [RDX], CL; - mov AL, CL; - ret; - } - } - else - { - asm pure nothrow @nogc @trusted - { - naked; - xchg [RSI], DIL; - mov AL, DIL; - ret; - } - } - } - else static if ( T.sizeof == short.sizeof ) - { - version (Windows) - { - asm pure nothrow @nogc @trusted - { - naked; - xchg [RDX], CX; - mov AX, CX; - ret; - } - } - else - { - asm pure nothrow @nogc @trusted - { - naked; - xchg [RSI], DI; - mov AX, DI; - ret; - } - } - } - else static if ( T.sizeof == int.sizeof ) - { - version (Windows) - { - asm pure nothrow @nogc @trusted - { - naked; - xchg [RDX], ECX; - mov EAX, ECX; - ret; - } - } - else - { - asm pure nothrow @nogc @trusted - { - naked; - xchg [RSI], EDI; - mov EAX, EDI; - ret; - } - } - } - else static if ( T.sizeof == long.sizeof ) - { - version (Windows) - { - asm pure nothrow @nogc @trusted - { - naked; - xchg [RDX], RCX; - mov RAX, RCX; - ret; - } - } - else - { - asm pure nothrow @nogc @trusted - { - naked; - xchg [RSI], RDI; - mov RAX, RDI; - ret; - } - } - } - else - { - static assert( false, "Invalid template type specified." ); - } + return TailShared!T.init; } - bool casByRef(T,V1,V2)( ref T value, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted - { - return cas(&value, ifThis, writeThis); - } - bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted - if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) - in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) + /** + * Writes 'newval' into 'val'. The memory barrier specified by 'ms' is + * applied to the operation, which is fully sequenced by default. + * Valid memory orders are MemoryOrder.raw, MemoryOrder.rel, and + * MemoryOrder.seq. + * + * Params: + * val = The target variable. + * newval = The value to store. + */ + void atomicStore(MemoryOrder ms = MemoryOrder.seq,T,V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe + if ( __traits( compiles, { val = newval; } ) ) { - static assert (V1.sizeof == V2.sizeof, "Mismatching argument sizes"); - static if ( V2.sizeof == 4 && __traits(isFloating, V2) ) - { - uint cmp = *cast(uint*)&ifThis; - uint arg = *cast(uint*)&writeThis; - } - else static if ( V2.sizeof == 8 && __traits(isFloating, V2) ) - { - ulong cmp = *cast(ulong*)&ifThis; - ulong arg = *cast(ulong*)&writeThis; - } - else - { - alias cmp = ifThis; - alias arg = writeThis; - } - return casImplNoResult(here, cmp, arg); - } - bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe - if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ) - in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) - { - return casImplNoResult(here, ifThis, writeThis); } - - bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe - if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) - in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) +} +else version (AsmX86_32) +{ + TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @safe + if (!__traits(isFloating, T)) { - return casImplNoResult(here, ifThis, writeThis); - } + static assert( ms != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()" ); + static assert( __traits(isPOD, T), "argument to atomicLoad() must be POD" ); - private bool casImplNoResult(T,V1,V2)( shared(T)* here, V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe - { - // Windows: here = *R8, ifThis = RDX, writeThis = RCX - // Posix: here = *RDX, ifThis = RSI, writeThis = RDI static if ( T.sizeof == byte.sizeof ) { ////////////////////////////////////////////////////////////////// - // 1 Byte CAS + // 1 Byte Load ////////////////////////////////////////////////////////////////// - version (Windows) + + static if ( needsLoadBarrier!(ms) ) { asm pure nothrow @nogc @trusted { - naked; - mov AL, DL; - lock; cmpxchg [R8], CL; - setz AL; - ret; + mov DL, 0; + mov AL, 0; + mov ECX, val; + lock; // lock always needed to make this op atomic + cmpxchg [ECX], DL; } } else { asm pure nothrow @nogc @trusted { - naked; - mov AL, SIL; - lock; cmpxchg [RDX], DIL; - setz AL; - ret; + mov EAX, val; + mov AL, [EAX]; } } } else static if ( T.sizeof == short.sizeof ) { ////////////////////////////////////////////////////////////////// - // 2 Byte CAS + // 2 Byte Load ////////////////////////////////////////////////////////////////// - version (Windows) + + static if ( needsLoadBarrier!(ms) ) { asm pure nothrow @nogc @trusted { - naked; - mov AX, DX; - lock; cmpxchg [R8], CX; - setz AL; - ret; + mov DX, 0; + mov AX, 0; + mov ECX, val; + lock; // lock always needed to make this op atomic + cmpxchg [ECX], DX; } } else { asm pure nothrow @nogc @trusted { - naked; - mov AX, SI; - lock; cmpxchg [RDX], DI; - setz AL; - ret; + mov EAX, val; + mov AX, [EAX]; } } } else static if ( T.sizeof == int.sizeof ) { ////////////////////////////////////////////////////////////////// - // 4 Byte CAS - ////////////////////////////////////////////////////////////////// - version (Windows) - { - asm pure nothrow @nogc @trusted - { - naked; - mov EAX, EDX; - lock; cmpxchg [R8], ECX; - setz AL; - ret; - } - } - else - { - asm pure nothrow @nogc @trusted - { - naked; - mov EAX, ESI; - lock; cmpxchg [RDX], EDI; - setz AL; - ret; - } - } - } - else static if ( T.sizeof == long.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 8 Byte CAS on a 64-Bit Processor + // 4 Byte Load ////////////////////////////////////////////////////////////////// - version (Windows) + + static if ( needsLoadBarrier!(ms) ) { asm pure nothrow @nogc @trusted { - naked; - mov RAX, RDX; - lock; cmpxchg [R8], RCX; - setz AL; - ret; + mov EDX, 0; + mov EAX, 0; + mov ECX, val; + lock; // lock always needed to make this op atomic + cmpxchg [ECX], EDX; } } else { asm pure nothrow @nogc @trusted { - naked; - mov RAX, RSI; - lock; cmpxchg [RDX], RDI; - setz AL; - ret; + mov EAX, val; + mov EAX, [EAX]; } } } - else static if ( T.sizeof == long.sizeof*2 && has128BitCAS) + else static if ( T.sizeof == long.sizeof && has64BitCAS ) { ////////////////////////////////////////////////////////////////// - // 16 Byte CAS on a 64-Bit Processor + // 8 Byte Load on a 32-Bit Processor ////////////////////////////////////////////////////////////////// - - // Windows: here = *R8, ifThis = *RDX, writeThis = *RCX - // Posix: here = *R8, ifThis = RCX:RDX, writeThis = RSI:RDI - version (Windows) - { - asm pure nothrow @nogc @trusted - { - naked; - push RBX; - mov RAX, [RDX]; - mov RDX, 8[RDX]; - mov RBX, [RCX]; - mov RCX, 8[RCX]; - lock; cmpxchg16b [R8]; - setz AL; - pop RBX; - ret; - } - } - else - { - asm pure nothrow @nogc @trusted - { - naked; - push RBX; - mov RAX, RDX; - mov RDX, RCX; - mov RBX, RDI; - mov RCX, RSI; - lock; cmpxchg16b [R8]; - setz AL; - pop RBX; - ret; - } + + asm pure nothrow @nogc @trusted + { + push EDI; + push EBX; + mov EBX, 0; + mov ECX, 0; + mov EAX, 0; + mov EDX, 0; + mov EDI, val; + lock; // lock always needed to make this op atomic + cmpxchg8b [EDI]; + pop EBX; + pop EDI; } } else @@ -1454,243 +494,111 @@ else version (AsmX86_64) } } - bool cas(T,V1,V2)( shared(T)* here, V1* ifThis, V2 writeThis ) pure nothrow @nogc @trusted - if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) - in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) - { - static if ( V2.sizeof == 4 && __traits(isFloating, V2) ) - uint arg = *cast(uint*)&writeThis; - else static if ( V2.sizeof == 8 && __traits(isFloating, V2) ) - ulong arg = *cast(ulong*)&writeThis; - else - alias arg = writeThis; - return casImplWithResult(here, *ifThis, arg); - } - - bool cas(T,V1,V2)( shared(T)* here, shared(V1)* ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe - if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ) - in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) - { - return casImplWithResult(here, *ifThis, writeThis); - } - - bool cas(T,V1,V2)( shared(T)* here, shared(V1*)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe - if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) - in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) + void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe + if ( __traits( compiles, { val = newval; } ) ) { - return casImplWithResult(here, *ifThis, writeThis); - } + static assert( ms != MemoryOrder.acq, "invalid MemoryOrder for atomicStore()" ); + static assert( __traits(isPOD, T), "argument to atomicStore() must be POD" ); - private bool casImplWithResult(T,V1,V2)( shared(T)* here, ref V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe - { - // Windows: here = *R8, ifThis = *RDX, writeThis = RCX - // Posix: here = *RDX, ifThis = *RSI, writeThis = RDI static if ( T.sizeof == byte.sizeof ) { ////////////////////////////////////////////////////////////////// - // 1 Byte CAS + // 1 Byte Store ////////////////////////////////////////////////////////////////// - version (Windows) + + static if ( needsStoreBarrier!(ms) ) { asm pure nothrow @nogc @trusted { - naked; - mov AL, [RDX]; - lock; cmpxchg [R8], CL; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [RDX], AL; - xor AL, AL; - ret; + mov EAX, val; + mov DL, newval; + lock; + xchg [EAX], DL; } } else { asm pure nothrow @nogc @trusted { - naked; - mov AL, [RSI]; - lock; cmpxchg [RDX], DIL; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [RSI], AL; - xor AL, AL; - ret; + mov EAX, val; + mov DL, newval; + mov [EAX], DL; } } } else static if ( T.sizeof == short.sizeof ) { ////////////////////////////////////////////////////////////////// - // 2 Byte CAS + // 2 Byte Store ////////////////////////////////////////////////////////////////// - version (Windows) + + static if ( needsStoreBarrier!(ms) ) { asm pure nothrow @nogc @trusted { - naked; - mov AX, [RDX]; - lock; cmpxchg [R8], CX; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [RDX], AX; - xor AL, AL; - ret; + mov EAX, val; + mov DX, newval; + lock; + xchg [EAX], DX; } } else { asm pure nothrow @nogc @trusted { - naked; - mov AX, [RSI]; - lock; cmpxchg [RDX], DI; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [RSI], AX; - xor AL, AL; - ret; + mov EAX, val; + mov DX, newval; + mov [EAX], DX; } } } else static if ( T.sizeof == int.sizeof ) { ////////////////////////////////////////////////////////////////// - // 4 Byte CAS - ////////////////////////////////////////////////////////////////// - version (Windows) - { - asm pure nothrow @nogc @trusted - { - naked; - mov EAX, [RDX]; - lock; cmpxchg [R8], ECX; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [RDX], EAX; - xor AL, AL; - ret; - } - } - else - { - asm pure nothrow @nogc @trusted - { - naked; - mov EAX, [RSI]; - lock; cmpxchg [RDX], EDI; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [RSI], EAX; - xor AL, AL; - ret; - } - } - } - else static if ( T.sizeof == long.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 8 Byte CAS on a 64-Bit Processor + // 4 Byte Store ////////////////////////////////////////////////////////////////// - version (Windows) + + static if ( needsStoreBarrier!(ms) ) { asm pure nothrow @nogc @trusted { - naked; - mov RAX, [RDX]; - lock; cmpxchg [R8], RCX; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [RDX], RAX; - xor AL, AL; - ret; + mov EAX, val; + mov EDX, newval; + lock; + xchg [EAX], EDX; } } else { asm pure nothrow @nogc @trusted { - naked; - mov RAX, [RSI]; - lock; cmpxchg [RDX], RDI; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [RSI], RAX; - xor AL, AL; - ret; + mov EAX, val; + mov EDX, newval; + mov [EAX], EDX; } } } - else static if ( T.sizeof == long.sizeof*2 && has128BitCAS) + else static if ( T.sizeof == long.sizeof && has64BitCAS ) { ////////////////////////////////////////////////////////////////// - // 16 Byte CAS on a 64-Bit Processor + // 8 Byte Store on a 32-Bit Processor ////////////////////////////////////////////////////////////////// - // Windows: here = *R8, ifThis = *RDX, writeThis = *RCX - // Posix: here = *RCX, ifThis = *RDX, writeThis = RSI:RDI - version (Windows) - { - asm pure nothrow @nogc @trusted - { - naked; - push RBX; - mov R9, RDX; - mov RAX, [RDX]; - mov RDX, 8[RDX]; - mov RBX, [RCX]; - mov RCX, 8[RCX]; - lock; cmpxchg16b [R8]; - pop RBX; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [R9], RAX; - mov 8[R9], RDX; - xor AL, AL; - ret; - } - } - else + asm pure nothrow @nogc @trusted { - asm pure nothrow @nogc @trusted - { - naked; - push RBX; - mov R8, RCX; - mov R9, RDX; - mov RAX, [RDX]; - mov RDX, 8[RDX]; - mov RBX, RDI; - mov RCX, RSI; - lock; cmpxchg16b [R8]; - pop RBX; - jne compare_fail; - mov AL, 1; - ret; - compare_fail: - mov [R9], RAX; - mov 8[R9], RDX; - xor AL, AL; - ret; - } + push EDI; + push EBX; + lea EDI, newval; + mov EBX, [EDI]; + mov ECX, 4[EDI]; + mov EDI, val; + mov EAX, [EDI]; + mov EDX, 4[EDI]; + L1: lock; // lock always needed to make this op atomic + cmpxchg8b [EDI]; + jne L1; + pop EBX; + pop EDI; } } else @@ -1698,36 +606,9 @@ else version (AsmX86_64) static assert( false, "Invalid template type specified." ); } } - - - enum MemoryOrder - { - raw, - acq, - rel, - seq, - } - - - private - { - // NOTE: x86 loads implicitly have acquire semantics so a memory - // barrier is only necessary on releases. - template needsLoadBarrier( MemoryOrder ms ) - { - enum bool needsLoadBarrier = ms == MemoryOrder.seq; - } - - - // NOTE: x86 stores implicitly have release semantics so a memory - // barrier is only necessary on acquires. - template needsStoreBarrier( MemoryOrder ms ) - { - enum bool needsStoreBarrier = ms == MemoryOrder.seq; - } - } - - +} +else version (AsmX86_64) +{ TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @safe if (!__traits(isFloating, T)) { @@ -2062,19 +943,6 @@ else version (AsmX86_64) static assert( false, "Invalid template type specified." ); } } - - - void atomicFence() nothrow @nogc @safe - { - // SSE2 is always present in 64-bit x86 chips. - asm nothrow @nogc @trusted - { - naked; - - mfence; - ret; - } - } } // This is an ABI adapter that works on all architectures. It type puns @@ -2082,7 +950,7 @@ else version (AsmX86_64) // them back. This is necessary so that they get returned in floating // point instead of integer registers. TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @trusted -if (__traits(isFloating, T)) + if (__traits(isFloating, T)) { static if (T.sizeof == int.sizeof) { @@ -2104,6 +972,144 @@ if (__traits(isFloating, T)) } } +private +{ + // NOTE: x86 loads implicitly have acquire semantics so a memory + // barrier is only necessary on releases. + template needsLoadBarrier( MemoryOrder ms ) + { + enum bool needsLoadBarrier = ms == MemoryOrder.seq; + } + + + // NOTE: x86 stores implicitly have release semantics so a memory + // barrier is only necessary on acquires. + template needsStoreBarrier( MemoryOrder ms ) + { + enum bool needsStoreBarrier = ms == MemoryOrder.seq; + } + + // TODO: it'd be nice if we had @trusted scopes; we could remove this... + bool casByRef(T,V1,V2)( ref T value, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted + { + return cas( &value, ifThis, writeThis ); + } + + /* Construct a type with a shared tail, and if possible with an unshared + head. */ + template TailShared(U) if (!is(U == shared)) + { + alias TailShared = .TailShared!(shared U); + } + template TailShared(S) if (is(S == shared)) + { + // Get the unshared variant of S. + static if (is(S U == shared U)) {} + else static assert(false, "Should never be triggered. The `static " ~ + "if` declares `U` as the unshared version of the shared type " ~ + "`S`. `S` is explicitly declared as shared, so getting `U` " ~ + "should always work."); + + static if (is(S : U)) + alias TailShared = U; + else static if (is(S == struct)) + { + enum implName = () { + /* Start with "_impl". If S has a field with that name, append + underscores until the clash is resolved. */ + string name = "_impl"; + string[] fieldNames; + static foreach (alias field; S.tupleof) + { + fieldNames ~= __traits(identifier, field); + } + static bool canFind(string[] haystack, string needle) + { + foreach (candidate; haystack) + { + if (candidate == needle) return true; + } + return false; + } + while (canFind(fieldNames, name)) name ~= "_"; + return name; + } (); + struct TailShared + { + static foreach (i, alias field; S.tupleof) + { + /* On @trusted: This is casting the field from shared(Foo) + to TailShared!Foo. The cast is safe because the field has + been loaded and is not shared anymore. */ + mixin(" + @trusted @property + ref " ~ __traits(identifier, field) ~ "() + { + alias R = TailShared!(typeof(field)); + return * cast(R*) &" ~ implName ~ ".tupleof[i]; + } + "); + } + mixin(" + S " ~ implName ~ "; + alias " ~ implName ~ " this; + "); + } + } + else + alias TailShared = S; + } + @safe unittest + { + // No tail (no indirections) -> fully unshared. + + static assert(is(TailShared!int == int)); + static assert(is(TailShared!(shared int) == int)); + + static struct NoIndir { int i; } + static assert(is(TailShared!NoIndir == NoIndir)); + static assert(is(TailShared!(shared NoIndir) == NoIndir)); + + // Tail can be independently shared or is already -> tail-shared. + + static assert(is(TailShared!(int*) == shared(int)*)); + static assert(is(TailShared!(shared int*) == shared(int)*)); + static assert(is(TailShared!(shared(int)*) == shared(int)*)); + + static assert(is(TailShared!(int[]) == shared(int)[])); + static assert(is(TailShared!(shared int[]) == shared(int)[])); + static assert(is(TailShared!(shared(int)[]) == shared(int)[])); + + static struct S1 { shared int* p; } + static assert(is(TailShared!S1 == S1)); + static assert(is(TailShared!(shared S1) == S1)); + + static struct S2 { shared(int)* p; } + static assert(is(TailShared!S2 == S2)); + static assert(is(TailShared!(shared S2) == S2)); + + // Tail follows shared-ness of head -> fully shared. + + static class C { int i; } + static assert(is(TailShared!C == shared C)); + static assert(is(TailShared!(shared C) == shared C)); + + /* However, structs get a wrapper that has getters which cast to + TailShared. */ + + static struct S3 { int* p; int _impl; int _impl_; int _impl__; } + static assert(!is(TailShared!S3 : S3)); + static assert(is(TailShared!S3 : shared S3)); + static assert(is(TailShared!(shared S3) == TailShared!S3)); + + static struct S4 { shared(int)** p; } + static assert(!is(TailShared!S4 : S4)); + static assert(is(TailShared!S4 : shared S4)); + static assert(is(TailShared!(shared S4) == TailShared!S4)); + } +} + + //////////////////////////////////////////////////////////////////////////////// // Unit Tests //////////////////////////////////////////////////////////////////////////////// @@ -2148,7 +1154,7 @@ version (unittest) atom = cast(shared(T))null; - T arg = base; + shared(T) arg = base; assert( cas( &atom, &arg, val ), T.stringof ); assert( arg is base, T.stringof ); assert( atom is val, T.stringof ); diff --git a/src/core/internal/atomic.d b/src/core/internal/atomic.d new file mode 100644 index 0000000000..86ab4bcd50 --- /dev/null +++ b/src/core/internal/atomic.d @@ -0,0 +1,533 @@ +/** +* The core.internal.atomic module comtains the low-level atomic features available in hardware. +* This module may be a routing layer for compiler intrinsics. +* +* Copyright: Copyright Manu Evans 2019. +* License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) +* Authors: Sean Kelly, Alex Rønne Petersen, Manu Evans +* Source: $(DRUNTIMESRC core/internal/_atomic.d) +*/ + +module core.internal.atomic; + +import core.atomic : MemoryOrder; + +private +{ + enum : int + { + AX, BX, CX, DX, DI, SI, R8, R9 + } + + immutable string[4][8] registerNames = [ + [ "AL", "AX", "EAX", "RAX" ], + [ "BL", "BX", "EBX", "RBX" ], + [ "CL", "CX", "ECX", "RCX" ], + [ "DL", "DX", "EDX", "RDX" ], + [ "DIL", "DI", "EDI", "RDI" ], + [ "SIL", "SI", "ESI", "RSI" ], + [ "R8B", "R8W", "R8D", "R8" ], + [ "R9B", "R9W", "R9D", "R9" ], + ]; + + template RegIndex(T) + { + static if (T.sizeof == 1) + enum RegIndex = 0; + else static if (T.sizeof == 2) + enum RegIndex = 1; + else static if (T.sizeof == 4) + enum RegIndex = 2; + else static if (T.sizeof == 8) + enum RegIndex = 3; + else + static assert(false, "Invalid type"); + } + + enum SizedReg(int reg, T = size_t) = registerNames[reg][RegIndex!T]; +} + +T atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(T* src) pure nothrow @nogc @safe +{ + +} + +void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* src, T value) pure nothrow @nogc @safe +{ + +} + +T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @safe + if (is(T : ulong)) +{ + version (D_InlineAsm_X86) + { + static assert(T.sizeof <= 4, "64bit atomicFetchAdd not supported on 32bit target." ); + + enum DestReg = SizedReg!DX; + enum ValReg = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + mov %1, value; + mov %0, dest; + lock; xadd[%0], %1; + } + }, DestReg, ValReg)); + } + else version (D_InlineAsm_X86_64) + { + version (Windows) + { + enum DestReg = SizedReg!DX; + enum ValReg = SizedReg!(CX, T); + } + else + { + enum DestReg = SizedReg!SI; + enum ValReg = SizedReg!(DI, T); + } + enum ResReg = result ? SizedReg!(AX, T) : null; + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + naked; + lock; xadd[%0], %1; +?2 mov %2, %1; + ret; + } + }, DestReg, ValReg, ResReg)); + } + else + static assert (false, "Unsupported architecture."); +} + +T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @safe + if (is(T : ulong)) +{ + return atomicFetchAdd(dest, cast(T)-cast(IntOrLong!T)value); +} + +T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @safe + if (is(T : ulong) || is(T == class) || is(T U : U*)) +{ + version (D_InlineAsm_X86) + { + static assert(T.sizeof <= 4, "64bit atomicExchange not supported on 32bit target." ); + + enum DestReg = SizedReg!CX; + enum ValReg = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + mov %1, value; + mov %0, dest; + xchg [%0], %1; + } + }, DestReg, ValReg)); + } + else version (D_InlineAsm_X86_64) + { + version (Windows) + { + enum DestReg = SizedReg!DX; + enum ValReg = SizedReg!(CX, T); + } + else + { + enum DestReg = SizedReg!SI; + enum ValReg = SizedReg!(DI, T); + } + enum ResReg = result ? SizedReg!(AX, T) : null; + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + naked; + xchg [%0], %1; +?2 mov %2, %1; + ret; + } + }, DestReg, ValReg, ResReg)); + } + else + static assert (false, "Unsupported architecture."); +} + +alias atomicCompareExchangeWeak = atomicCompareExchangeStrong; + +bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @safe + if (CanCAS!T) +{ + version (D_InlineAsm_X86) + { + static if (T.sizeof <= 4) + { + enum DestAddr = SizedReg!CX; + enum CmpAddr = SizedReg!DI; + enum Val = SizedReg!(DX, T); + enum Cmp = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + push %1; + mov %2, value; + mov %1, compare; + mov %3, [%1]; + mov %0, dest; + lock; cmpxchg [%0], %2; + mov [%1], %3; + setz AL; + pop %1; + } + }, DestAddr, CmpAddr, Val, Cmp)); + } + else static if (T.sizeof == 8) + { + asm pure nothrow @nogc @trusted + { + push EDI; + push EBX; + lea EDI, value; + mov EBX, [EDI]; + mov ECX, 4[EDI]; + mov EDI, compare; + mov EAX, [EDI]; + mov EDX, 4[EDI]; + mov EDI, dest; + lock; cmpxchg8b [EDI]; + mov EDI, compare; + mov [EDI], EAX; + mov 4[EDI], EDX; + setz AL; + pop EBX; + pop EDI; + } + } + else + static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." ); + } + else version (D_InlineAsm_X86_64) + { + static if (T.sizeof <= 8) + { + version (Windows) + { + enum DestAddr = SizedReg!R8; + enum CmpAddr = SizedReg!DX; + enum Val = SizedReg!(CX, T); + } + else + { + enum DestAddr = SizedReg!DX; + enum CmpAddr = SizedReg!SI; + enum Val = SizedReg!(DI, T); + } + enum Res = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + naked; + mov %3, [%1]; + lock; cmpxchg [%0], %2; + jne compare_fail; + mov AL, 1; + ret; + compare_fail: + mov [%1], %3; + xor AL, AL; + ret; + } + }, DestAddr, CmpAddr, Val, Res)); + } + else + { + version (Windows) + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov R9, RDX; + mov RAX, [RDX]; + mov RDX, 8[RDX]; + mov RBX, [RCX]; + mov RCX, 8[RCX]; + lock; cmpxchg16b [R8]; + pop RBX; + jne compare_fail; + mov AL, 1; + ret; + compare_fail: + mov [R9], RAX; + mov 8[R9], RDX; + xor AL, AL; + ret; + } + } + else + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov R8, RCX; + mov R9, RDX; + mov RAX, [RDX]; + mov RDX, 8[RDX]; + mov RBX, RDI; + mov RCX, RSI; + lock; cmpxchg16b [R8]; + pop RBX; + jne compare_fail; + mov AL, 1; + ret; + compare_fail: + mov [R9], RAX; + mov 8[R9], RDX; + xor AL, AL; + ret; + } + } + } + } + else + static assert (false, "Unsupported architecture."); +} + +bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T compare, T value) pure nothrow @nogc @safe + if (CanCAS!T) +{ + version (D_InlineAsm_X86) + { + static if (T.sizeof <= 4) + { + enum DestAddr = SizedReg!CX; + enum Cmp = SizedReg!(AX, T); + enum Val = SizedReg!(DX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + mov %2, value; + mov %1, compare; + mov %0, dest; + lock; cmpxchg [%0], %2; + setz AL; + } + }, DestAddr, Cmp, Val)); + } + else static if (T.sizeof == 8) + { + asm pure nothrow @nogc @trusted + { + push EDI; + push EBX; + lea EDI, value; + mov EBX, [EDI]; + mov ECX, 4[EDI]; + lea EDI, compare; + mov EAX, [EDI]; + mov EDX, 4[EDI]; + mov EDI, dest; + lock; cmpxchg8b [EDI]; + setz AL; + pop EBX; + pop EDI; + } + } + else + static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." ); + } + else version (D_InlineAsm_X86_64) + { + static if (T.sizeof <= 8) + { + version (Windows) + { + enum DestAddr = SizedReg!R8; + enum Cmp = SizedReg!(DX, T); + enum Val = SizedReg!(CX, T); + } + else + { + enum DestAddr = SizedReg!DX; + enum Cmp = SizedReg!(SI, T); + enum Val = SizedReg!(DI, T); + } + enum AXReg = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + naked; + mov %3, %1; + lock; cmpxchg [%0], %2; + setz AL; + ret; + } + }, DestAddr, Cmp, Val, AXReg)); + } + else + { + version (Windows) + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov RAX, [RDX]; + mov RDX, 8[RDX]; + mov RBX, [RCX]; + mov RCX, 8[RCX]; + lock; cmpxchg16b [R8]; + setz AL; + pop RBX; + ret; + } + } + else + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov RAX, RDX; + mov RDX, RCX; + mov RBX, RDI; + mov RCX, RSI; + lock; cmpxchg16b [R8]; + setz AL; + pop RBX; + ret; + } + } + } + } + else + static assert (false, "Unsupported architecture."); +} + +void atomicFence(MemoryOrder order = MemoryOrder.seq)() nothrow @nogc @safe +{ + // TODO: `mfence` should only be required for seq_cst operations, but this depends on + // the compiler's backend knowledge to not reorder code inappropriately, + // so we'll apply it conservatively. + static if (order != MemoryOrder.raw) + { + version (D_InlineAsm_X86) + { + import core.cpuid; + + // TODO: review this implementation; it seems way overly complicated + asm pure nothrow @nogc @trusted + { + naked; + + call sse2; + test AL, AL; + jne Lcpuid; + + // Fast path: We have SSE2, so just use mfence. + mfence; + jmp Lend; + + Lcpuid: + + // Slow path: We use cpuid to serialize. This is + // significantly slower than mfence, but is the + // only serialization facility we have available + // on older non-SSE2 chips. + push EBX; + + mov EAX, 0; + cpuid; + + pop EBX; + + Lend: + + ret; + } + } + else version (D_InlineAsm_X86_64) + { + asm nothrow @nogc @trusted + { + naked; + mfence; + ret; + } + } + } + else + static assert (false, "Unsupported architecture."); +} + + +private: + +enum CanCAS(T) = is(T : ulong) || + is(T == class) || + is(T : U*, U) || + (is(T == struct) && T.sizeof <= 16 && (T.sizeof & (T.sizeof - 1)) == 0); + +template IntOrLong(T) +{ + static if (T.sizeof > 4) + alias IntOrLong = long; + else + alias IntOrLong = int; +} + +// this is a helper to build asm blocks +string simpleFormat(string format, string[] args...) +{ + string result; + outer: while (format.length) + { + foreach (i; 0 .. format.length) + { + if (format[i] == '%' || format[i] == '?') + { + bool isQ = format[i] == '?'; + result ~= format[0 .. i++]; + assert (i < format.length, "Invalid format string"); + if (format[i] == '%' || format[i] == '?') + { + assert(!isQ, "Invalid format string"); + result ~= format[i++]; + } + else + { + int index = 0; + assert (format[i] >= '0' && format[i] <= '9', "Invalid format string"); + while (i < format.length && format[i] >= '0' && format[i] <= '9') + index = index * 10 + (ubyte(format[i++]) - ubyte('0')); + if (!isQ) + result ~= args[index]; + else if (!args[index]) + { + size_t j = i; + for (; j < format.length;) + { + if (format[j++] == '\n') + break; + } + i = j; + } + } + format = format[i .. $]; + continue outer; + } + } + result ~= format; + break; + } + return result; +} From 2763339c1256954d78f52c6e142ae4f708718a99 Mon Sep 17 00:00:00 2001 From: Manu Evans Date: Sat, 17 Aug 2019 22:26:24 -0700 Subject: [PATCH 3/5] Move atomicStore --- src/core/atomic.d | 356 +++++-------------------------------- src/core/internal/atomic.d | 89 +++++++++- 2 files changed, 130 insertions(+), 315 deletions(-) diff --git a/src/core/atomic.d b/src/core/atomic.d index 04b045d2ab..8c608520e8 100644 --- a/src/core/atomic.d +++ b/src/core/atomic.d @@ -97,6 +97,29 @@ enum MemoryOrder seq, } +/** + * Writes 'newval' into 'val'. The memory barrier specified by 'ms' is + * applied to the operation, which is fully sequenced by default. + * Valid memory orders are MemoryOrder.raw, MemoryOrder.rel, and + * MemoryOrder.seq. + * + * Params: + * val = The target variable. + * newval = The value to store. + */ +void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V)( ref shared T val, V newval ) pure nothrow @nogc @trusted + if ( __traits( compiles, { val = newval; } ) ) +{ + static if ( __traits(isFloating, T) ) + { + static assert ( __traits(isFloating, V) && V.sizeof == T.sizeof, "Mismatching argument types." ); + alias IntTy = IntForFloat!T; + core.internal.atomic.atomicStore(cast(IntTy*)&val, *cast(IntTy*)&newval); + } + else + core.internal.atomic.atomicStore(cast(T*)&val, newval); +} + /** * Atomically adds `mod` to the value referenced by `val` and returns the value `val` held previously. * This operation is both lock-free and atomic. @@ -148,15 +171,11 @@ shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = exchangeWith; } ) ) in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) { - static if ( __traits(isFloating, V) ) + static if ( __traits(isFloating, T) ) { - static if ( V.sizeof == 4 ) - alias I = uint; - else static if ( V.sizeof == 8 ) - alias I = ulong; - else - static assert( false, "Float type " ~ V.stringof ~ " not supported."); - I r = core.internal.atomic.atomicExchange(cast(shared(I)*)here, *cast(I*)&exchangeWith); + static assert ( __traits(isFloating, V) && V.sizeof == T.sizeof, "Mismatching argument types." ); + alias IntTy = IntForFloat!T; + IntTy r = core.internal.atomic.atomicExchange(cast(IntTy*)here, *cast(IntTy*)&exchangeWith); return *cast(shared(T)*)&r; } else @@ -198,11 +217,9 @@ in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligne { static if ( __traits(isFloating, T) ) { - static assert ( __traits(isFloating, V1) && __traits(isFloating, V2), "Mismatching argument types." ); - static if ( T.sizeof == 4 ) - alias IntTy = uint; - else static if ( T.sizeof == 8 ) - alias IntTy = ulong; + static assert ( __traits(isFloating, V1) && V1.sizeof == T.sizeof, "Mismatching argument types." ); + static assert ( __traits(isFloating, V2) && V2.sizeof == T.sizeof, "Mismatching argument types." ); + alias IntTy = IntForFloat!T; return atomicCompareExchangeStrongNoResult( cast(IntTy*)here, *cast(IntTy*)&ifThis, *cast(IntTy*)&writeThis ); } else @@ -245,11 +262,8 @@ in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligne { static if ( __traits(isFloating, T) ) { - static assert ( __traits(isFloating, V), "Mismatching argument types." ); - static if ( T.sizeof == 4 ) - alias IntTy = uint; - else static if ( T.sizeof == 8 ) - alias IntTy = ulong; + static assert ( __traits(isFloating, V) && V.sizeof == T.sizeof, "Mismatching argument types." ); + alias IntTy = IntForFloat!T; return atomicCompareExchangeStrong( cast(IntTy*)here, cast(IntTy*)ifThis, *cast(IntTy*)&writeThis ); } else @@ -363,23 +377,6 @@ version (CoreDdoc) { return TailShared!T.init; } - - - /** - * Writes 'newval' into 'val'. The memory barrier specified by 'ms' is - * applied to the operation, which is fully sequenced by default. - * Valid memory orders are MemoryOrder.raw, MemoryOrder.rel, and - * MemoryOrder.seq. - * - * Params: - * val = The target variable. - * newval = The value to store. - */ - void atomicStore(MemoryOrder ms = MemoryOrder.seq,T,V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe - if ( __traits( compiles, { val = newval; } ) ) - { - - } } else version (AsmX86_32) { @@ -493,119 +490,6 @@ else version (AsmX86_32) static assert( false, "Invalid template type specified." ); } } - - void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe - if ( __traits( compiles, { val = newval; } ) ) - { - static assert( ms != MemoryOrder.acq, "invalid MemoryOrder for atomicStore()" ); - static assert( __traits(isPOD, T), "argument to atomicStore() must be POD" ); - - static if ( T.sizeof == byte.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 1 Byte Store - ////////////////////////////////////////////////////////////////// - - static if ( needsStoreBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov DL, newval; - lock; - xchg [EAX], DL; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov DL, newval; - mov [EAX], DL; - } - } - } - else static if ( T.sizeof == short.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 2 Byte Store - ////////////////////////////////////////////////////////////////// - - static if ( needsStoreBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov DX, newval; - lock; - xchg [EAX], DX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov DX, newval; - mov [EAX], DX; - } - } - } - else static if ( T.sizeof == int.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 4 Byte Store - ////////////////////////////////////////////////////////////////// - - static if ( needsStoreBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov EDX, newval; - lock; - xchg [EAX], EDX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov EDX, newval; - mov [EAX], EDX; - } - } - } - else static if ( T.sizeof == long.sizeof && has64BitCAS ) - { - ////////////////////////////////////////////////////////////////// - // 8 Byte Store on a 32-Bit Processor - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - push EDI; - push EBX; - lea EDI, newval; - mov EBX, [EDI]; - mov ECX, 4[EDI]; - mov EDI, val; - mov EAX, [EDI]; - mov EDX, 4[EDI]; - L1: lock; // lock always needed to make this op atomic - cmpxchg8b [EDI]; - jne L1; - pop EBX; - pop EDI; - } - } - else - { - static assert( false, "Invalid template type specified." ); - } - } } else version (AsmX86_64) { @@ -780,169 +664,6 @@ else version (AsmX86_64) static assert( false, "Invalid template type specified." ); } } - - - void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe - if ( __traits( compiles, { val = newval; } ) ) - { - static assert( ms != MemoryOrder.acq, "invalid MemoryOrder for atomicStore()" ); - static assert( __traits(isPOD, T), "argument to atomicStore() must be POD" ); - - static if ( T.sizeof == byte.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 1 Byte Store - ////////////////////////////////////////////////////////////////// - - static if ( needsStoreBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov DL, newval; - lock; - xchg [RAX], DL; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov DL, newval; - mov [RAX], DL; - } - } - } - else static if ( T.sizeof == short.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 2 Byte Store - ////////////////////////////////////////////////////////////////// - - static if ( needsStoreBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov DX, newval; - lock; - xchg [RAX], DX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov DX, newval; - mov [RAX], DX; - } - } - } - else static if ( T.sizeof == int.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 4 Byte Store - ////////////////////////////////////////////////////////////////// - - static if ( needsStoreBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov EDX, newval; - lock; - xchg [RAX], EDX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov EDX, newval; - mov [RAX], EDX; - } - } - } - else static if ( T.sizeof == long.sizeof && has64BitCAS ) - { - ////////////////////////////////////////////////////////////////// - // 8 Byte Store on a 64-Bit Processor - ////////////////////////////////////////////////////////////////// - - static if ( needsStoreBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov RDX, newval; - lock; - xchg [RAX], RDX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov RDX, newval; - mov [RAX], RDX; - } - } - } - else static if ( T.sizeof == long.sizeof*2 && has128BitCAS ) - { - ////////////////////////////////////////////////////////////////// - // 16 Byte Store on a 64-Bit Processor - ////////////////////////////////////////////////////////////////// - version (Win64){ - asm pure nothrow @nogc @trusted - { - push RDI; - push RBX; - mov R9, val; - mov R10, newval; - - mov RDI, R10; - mov RBX, [RDI]; - mov RCX, 8[RDI]; - - mov RDI, R9; - mov RAX, [RDI]; - mov RDX, 8[RDI]; - - L1: lock; // lock always needed to make this op atomic - cmpxchg16b [RDI]; - jne L1; - pop RBX; - pop RDI; - } - }else{ - asm pure nothrow @nogc @trusted - { - push RDI; - push RBX; - lea RDI, newval; - mov RBX, [RDI]; - mov RCX, 8[RDI]; - mov RDI, val; - mov RAX, [RDI]; - mov RDX, 8[RDI]; - L1: lock; // lock always needed to make this op atomic - cmpxchg16b [RDI]; - jne L1; - pop RBX; - pop RDI; - } - } - } - else - { - static assert( false, "Invalid template type specified." ); - } - } } // This is an ABI adapter that works on all architectures. It type puns @@ -974,6 +695,17 @@ TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T private { + template IntForFloat(F) + { + static assert ( __traits(isFloating, F), "Not a floating point type: " ~ F.stringof ); + static if ( F.sizeof == 4 ) + alias IntForFloat = uint; + else static if ( F.sizeof == 8 ) + alias IntForFloat = ulong; + else + static assert ( false, "Invalid floating point type: " ~ F.stringof ~ ", only support `float` and `double`." ); + } + // NOTE: x86 loads implicitly have acquire semantics so a memory // barrier is only necessary on releases. template needsLoadBarrier( MemoryOrder ms ) @@ -1224,7 +956,7 @@ version (unittest) { () @trusted { - struct Big { long a, b; } + align(16) struct Big { long a, b; } shared(Big) atom; shared(Big) base; diff --git a/src/core/internal/atomic.d b/src/core/internal/atomic.d index 86ab4bcd50..caba54168f 100644 --- a/src/core/internal/atomic.d +++ b/src/core/internal/atomic.d @@ -52,9 +52,74 @@ T atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(T* src) pure nothrow @nogc } -void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* src, T value) pure nothrow @nogc @safe +void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @safe + if (CanCAS!T) { + static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()"); + static assert(__traits(isPOD, T), "Argument to atomicStore() must be POD"); + static if (T.sizeof == size_t.sizeof * 2) + { + version (D_InlineAsm_X86) + { + asm pure nothrow @nogc @trusted + { + push EDI; + push EBX; + lea EDI, value; + mov EBX, [EDI]; + mov ECX, 4[EDI]; + mov EDI, dest; + mov EAX, [EDI]; + mov EDX, 4[EDI]; + L1: lock; cmpxchg8b [EDI]; + jne L1; + pop EBX; + pop EDI; + } + } + else version(D_InlineAsm_X86_64) + { + version (Windows) + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov R8, RDX; + mov RAX, [RDX]; + mov RDX, 8[RDX]; + mov RBX, [RCX]; + mov RCX, 8[RCX]; + L1: lock; cmpxchg16b [R8]; + jne L1; + pop RBX; + ret; + } + } + else + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov RBX, RDI; + mov RCX, RSI; + mov RDI, RDX; + mov RAX, [RDX]; + mov RDX, 8[RDX]; + L1: lock; cmpxchg16b [RDI]; + jne L1; + pop RBX; + ret; + } + } + } + } + else static if (needsStoreBarrier!order) + atomicExchange!(order, false)(dest, value); + else + *dest = value; } T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @safe @@ -300,7 +365,7 @@ bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder static assert (false, "Unsupported architecture."); } -bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T compare, T value) pure nothrow @nogc @safe +bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @safe if (CanCAS!T) { version (D_InlineAsm_X86) @@ -475,7 +540,10 @@ private: enum CanCAS(T) = is(T : ulong) || is(T == class) || is(T : U*, U) || - (is(T == struct) && T.sizeof <= 16 && (T.sizeof & (T.sizeof - 1)) == 0); + (is(T == struct) && __traits(isPOD, T) && + T.sizeof <= size_t.sizeof*2 && // no more than 2 words + (T.sizeof & (T.sizeof - 1)) == 0 // is power of 2 + ); template IntOrLong(T) { @@ -485,6 +553,21 @@ template IntOrLong(T) alias IntOrLong = int; } +// NOTE: x86 loads implicitly have acquire semantics so a memory +// barrier is only necessary on releases. +template needsLoadBarrier( MemoryOrder ms ) +{ + enum bool needsLoadBarrier = ms == MemoryOrder.seq; +} + + +// NOTE: x86 stores implicitly have release semantics so a memory +// barrier is only necessary on acquires. +template needsStoreBarrier( MemoryOrder ms ) +{ + enum bool needsStoreBarrier = ms == MemoryOrder.seq; +} + // this is a helper to build asm blocks string simpleFormat(string format, string[] args...) { From 4120ca76b364c0c238af039f2dab2b0af67b58fb Mon Sep 17 00:00:00 2001 From: Manu Evans Date: Sat, 17 Aug 2019 23:08:12 -0700 Subject: [PATCH 4/5] Move atomicLoad --- src/core/atomic.d | 401 ++++--------------------------------- src/core/internal/atomic.d | 124 +++++++++++- 2 files changed, 160 insertions(+), 365 deletions(-) diff --git a/src/core/atomic.d b/src/core/atomic.d index 8c608520e8..a68f677ba7 100644 --- a/src/core/atomic.d +++ b/src/core/atomic.d @@ -97,6 +97,33 @@ enum MemoryOrder seq, } +/** + * Loads 'val' from memory and returns it. The memory barrier specified + * by 'ms' is applied to the operation, which is fully sequenced by + * default. Valid memory orders are MemoryOrder.raw, MemoryOrder.acq, + * and MemoryOrder.seq. + * + * Params: + * val = The target variable. + * + * Returns: + * The value of 'val'. + */ +TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @trusted +{ + static if ( __traits(isFloating, T) ) + { + alias IntTy = IntForFloat!T; + IntTy r = core.internal.atomic.atomicLoad!ms(cast(IntTy*)&val); + return *cast(T*)&r; + } + else + { + T r = core.internal.atomic.atomicLoad!ms(cast(T*)&val); + return *cast(TailShared!T*)&r; + } +} + /** * Writes 'newval' into 'val'. The memory barrier specified by 'ms' is * applied to the operation, which is fully sequenced by default. @@ -114,10 +141,10 @@ void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V)( ref shared T val, V ne { static assert ( __traits(isFloating, V) && V.sizeof == T.sizeof, "Mismatching argument types." ); alias IntTy = IntForFloat!T; - core.internal.atomic.atomicStore(cast(IntTy*)&val, *cast(IntTy*)&newval); + core.internal.atomic.atomicStore!ms(cast(IntTy*)&val, *cast(IntTy*)&newval); } else - core.internal.atomic.atomicStore(cast(T*)&val, newval); + core.internal.atomic.atomicStore!ms(cast(T*)&val, newval); } /** @@ -131,11 +158,11 @@ void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V)( ref shared T val, V ne * Returns: * The value held previously by `val`. */ -TailShared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted +TailShared!(T) atomicFetchAdd(MemoryOrder ms = MemoryOrder.seq, T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted if ( __traits(isIntegral, T) ) in ( atomicValueIsProperlyAligned(val) ) { - return core.internal.atomic.atomicFetchAdd( &val, cast(T)mod ); + return core.internal.atomic.atomicFetchAdd!ms( &val, cast(T)mod ); } /** @@ -149,11 +176,11 @@ in ( atomicValueIsProperlyAligned(val) ) * Returns: * The value held previously by `val`. */ -TailShared!(T) atomicFetchSub(T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted +TailShared!(T) atomicFetchSub(MemoryOrder ms = MemoryOrder.seq, T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted if ( __traits(isIntegral, T) ) in ( atomicValueIsProperlyAligned(val) ) { - return core.internal.atomic.atomicFetchSub( &val, cast(T)mod ); + return core.internal.atomic.atomicFetchSub!ms( &val, cast(T)mod ); } /** @@ -175,11 +202,11 @@ in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligne { static assert ( __traits(isFloating, V) && V.sizeof == T.sizeof, "Mismatching argument types." ); alias IntTy = IntForFloat!T; - IntTy r = core.internal.atomic.atomicExchange(cast(IntTy*)here, *cast(IntTy*)&exchangeWith); + IntTy r = core.internal.atomic.atomicExchange!ms(cast(IntTy*)here, *cast(IntTy*)&exchangeWith); return *cast(shared(T)*)&r; } else - return core.internal.atomic.atomicExchange(here, exchangeWith); + return core.internal.atomic.atomicExchange!ms(here, exchangeWith); } /// Ditto @@ -187,7 +214,7 @@ shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, if ( is(T == class) && __traits( compiles, { *here = exchangeWith; } ) ) in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) { - return core.internal.atomic.atomicExchange(here, exchangeWith); + return core.internal.atomic.atomicExchange!ms(here, exchangeWith); } /// Ditto @@ -195,7 +222,7 @@ shared(T) atomicExchange(MemoryOrder ms = MemoryOrder.seq,T,V)( shared(T)* here, if ( is(T U : U*) && __traits( compiles, { *here = exchangeWith; } ) ) in ( atomicPtrIsProperlyAligned( here ), "Argument `here` is not properly aligned" ) { - return core.internal.atomic.atomicExchange(here, exchangeWith); + return core.internal.atomic.atomicExchange!ms(here, exchangeWith); } /** @@ -333,11 +360,11 @@ in ( atomicValueIsProperlyAligned( val ) ) // |= ^= <<= >>= >>>= ~= static if ( op == "+=" && __traits(isIntegral, T) && __traits(isIntegral, V1) && T.sizeof <= size_t.sizeof && V1.sizeof <= size_t.sizeof) { - return cast(T)( atomicFetchAdd!(T)( val, mod ) + mod ); + return cast(T)( atomicFetchAdd!(MemoryOrder.seq, T)( val, mod ) + mod ); } else static if ( op == "-=" && __traits(isIntegral, T) && __traits(isIntegral, V1) && T.sizeof <= size_t.sizeof && V1.sizeof <= size_t.sizeof) { - return cast(T)( atomicFetchSub!(T)( val, mod ) - mod ); + return cast(T)( atomicFetchSub!(MemoryOrder.seq, T)( val, mod ) - mod ); } else static if ( op == "+=" || op == "-=" || op == "*=" || op == "/=" || op == "%=" || op == "^^=" || op == "&=" || op == "|=" || @@ -358,341 +385,6 @@ in ( atomicValueIsProperlyAligned( val ) ) } } - -version (CoreDdoc) -{ - /** - * Loads 'val' from memory and returns it. The memory barrier specified - * by 'ms' is applied to the operation, which is fully sequenced by - * default. Valid memory orders are MemoryOrder.raw, MemoryOrder.acq, - * and MemoryOrder.seq. - * - * Params: - * val = The target variable. - * - * Returns: - * The value of 'val'. - */ - TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq,T)( ref const shared T val ) pure nothrow @nogc @safe - { - return TailShared!T.init; - } -} -else version (AsmX86_32) -{ - TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @safe - if (!__traits(isFloating, T)) - { - static assert( ms != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()" ); - static assert( __traits(isPOD, T), "argument to atomicLoad() must be POD" ); - - static if ( T.sizeof == byte.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 1 Byte Load - ////////////////////////////////////////////////////////////////// - - static if ( needsLoadBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov DL, 0; - mov AL, 0; - mov ECX, val; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], DL; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov AL, [EAX]; - } - } - } - else static if ( T.sizeof == short.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 2 Byte Load - ////////////////////////////////////////////////////////////////// - - static if ( needsLoadBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov DX, 0; - mov AX, 0; - mov ECX, val; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], DX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov AX, [EAX]; - } - } - } - else static if ( T.sizeof == int.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 4 Byte Load - ////////////////////////////////////////////////////////////////// - - static if ( needsLoadBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov EDX, 0; - mov EAX, 0; - mov ECX, val; - lock; // lock always needed to make this op atomic - cmpxchg [ECX], EDX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov EAX, val; - mov EAX, [EAX]; - } - } - } - else static if ( T.sizeof == long.sizeof && has64BitCAS ) - { - ////////////////////////////////////////////////////////////////// - // 8 Byte Load on a 32-Bit Processor - ////////////////////////////////////////////////////////////////// - - asm pure nothrow @nogc @trusted - { - push EDI; - push EBX; - mov EBX, 0; - mov ECX, 0; - mov EAX, 0; - mov EDX, 0; - mov EDI, val; - lock; // lock always needed to make this op atomic - cmpxchg8b [EDI]; - pop EBX; - pop EDI; - } - } - else - { - static assert( false, "Invalid template type specified." ); - } - } -} -else version (AsmX86_64) -{ - TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @safe - if (!__traits(isFloating, T)) - { - static assert( ms != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()" ); - static assert( __traits(isPOD, T), "argument to atomicLoad() must be POD" ); - - static if ( T.sizeof == byte.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 1 Byte Load - ////////////////////////////////////////////////////////////////// - - static if ( needsLoadBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov DL, 0; - mov AL, 0; - mov RCX, val; - lock; // lock always needed to make this op atomic - cmpxchg [RCX], DL; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov AL, [RAX]; - } - } - } - else static if ( T.sizeof == short.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 2 Byte Load - ////////////////////////////////////////////////////////////////// - - static if ( needsLoadBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov DX, 0; - mov AX, 0; - mov RCX, val; - lock; // lock always needed to make this op atomic - cmpxchg [RCX], DX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov AX, [RAX]; - } - } - } - else static if ( T.sizeof == int.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 4 Byte Load - ////////////////////////////////////////////////////////////////// - - static if ( needsLoadBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov EDX, 0; - mov EAX, 0; - mov RCX, val; - lock; // lock always needed to make this op atomic - cmpxchg [RCX], EDX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov EAX, [RAX]; - } - } - } - else static if ( T.sizeof == long.sizeof ) - { - ////////////////////////////////////////////////////////////////// - // 8 Byte Load - ////////////////////////////////////////////////////////////////// - - static if ( needsLoadBarrier!(ms) ) - { - asm pure nothrow @nogc @trusted - { - mov RDX, 0; - mov RAX, 0; - mov RCX, val; - lock; // lock always needed to make this op atomic - cmpxchg [RCX], RDX; - } - } - else - { - asm pure nothrow @nogc @trusted - { - mov RAX, val; - mov RAX, [RAX]; - } - } - } - else static if ( T.sizeof == long.sizeof*2 && has128BitCAS ) - { - ////////////////////////////////////////////////////////////////// - // 16 Byte Load on a 64-Bit Processor - ////////////////////////////////////////////////////////////////// - version (Win64){ - size_t[2] retVal; - asm pure nothrow @nogc @trusted - { - push RDI; - push RBX; - mov RDI, val; - mov RBX, 0; - mov RCX, 0; - mov RAX, 0; - mov RDX, 0; - lock; // lock always needed to make this op atomic - cmpxchg16b [RDI]; - lea RDI, retVal; - mov [RDI], RAX; - mov 8[RDI], RDX; - pop RBX; - pop RDI; - } - - static if (is(T:U[], U)) - { - pragma(inline, true) - static typeof(return) toTrusted(size_t[2] retVal) @trusted - { - return *(cast(typeof(return)*) retVal.ptr); - } - - return toTrusted(retVal); - } - else - { - return cast(typeof(return)) retVal; - } - }else{ - asm pure nothrow @nogc @trusted - { - push RDI; - push RBX; - mov RBX, 0; - mov RCX, 0; - mov RAX, 0; - mov RDX, 0; - mov RDI, val; - lock; // lock always needed to make this op atomic - cmpxchg16b [RDI]; - pop RBX; - pop RDI; - } - } - } - else - { - static assert( false, "Invalid template type specified." ); - } - } -} - -// This is an ABI adapter that works on all architectures. It type puns -// floats and doubles to ints and longs, atomically loads them, then puns -// them back. This is necessary so that they get returned in floating -// point instead of integer registers. -TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @trusted - if (__traits(isFloating, T)) -{ - static if (T.sizeof == int.sizeof) - { - static assert(is(T : float)); - auto ptr = cast(const shared int*) &val; - auto asInt = atomicLoad!(ms)(*ptr); - return *(cast(typeof(return)*) &asInt); - } - else static if (T.sizeof == long.sizeof) - { - static assert(is(T : double)); - auto ptr = cast(const shared long*) &val; - auto asLong = atomicLoad!(ms)(*ptr); - return *(cast(typeof(return)*) &asLong); - } - else - { - static assert(0, "Cannot atomically load 80-bit reals."); - } -} - private { template IntForFloat(F) @@ -706,21 +398,6 @@ private static assert ( false, "Invalid floating point type: " ~ F.stringof ~ ", only support `float` and `double`." ); } - // NOTE: x86 loads implicitly have acquire semantics so a memory - // barrier is only necessary on releases. - template needsLoadBarrier( MemoryOrder ms ) - { - enum bool needsLoadBarrier = ms == MemoryOrder.seq; - } - - - // NOTE: x86 stores implicitly have release semantics so a memory - // barrier is only necessary on acquires. - template needsStoreBarrier( MemoryOrder ms ) - { - enum bool needsStoreBarrier = ms == MemoryOrder.seq; - } - // TODO: it'd be nice if we had @trusted scopes; we could remove this... bool casByRef(T,V1,V2)( ref T value, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted { diff --git a/src/core/internal/atomic.d b/src/core/internal/atomic.d index caba54168f..3f47a20c3c 100644 --- a/src/core/internal/atomic.d +++ b/src/core/internal/atomic.d @@ -47,16 +47,127 @@ private enum SizedReg(int reg, T = size_t) = registerNames[reg][RegIndex!T]; } -T atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(T* src) pure nothrow @nogc @safe +T atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(T* src) pure nothrow @nogc @trusted + if (CanCAS!T) { + static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()"); + + static if (T.sizeof == size_t.sizeof * 2) + { + version (D_InlineAsm_X86) + { + asm pure nothrow @nogc @trusted + { + push EDI; + push EBX; + mov EBX, 0; + mov ECX, 0; + mov EAX, 0; + mov EDX, 0; + mov EDI, src; + lock; cmpxchg8b [EDI]; + pop EBX; + pop EDI; + } + } + else version (D_InlineAsm_X86_64) + { + version (Windows) + { + static if (RegisterReturn!T) + { + enum SrcPtr = SizedReg!CX; + enum RetPtr = null; + } + else + { + enum SrcPtr = SizedReg!DX; + enum RetPtr = SizedReg!CX; + } + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov R8, %0; +?1 mov R9, %1; + mov RBX, 0; + mov RCX, 0; + mov RAX, 0; + mov RDX, 0; + lock; cmpxchg16b [R8]; +?1 mov [R9], RAX; +?1 mov 8[R9], RDX; + pop RBX; + ret; + } + }, SrcPtr, RetPtr)); + } + else + { + asm pure nothrow @nogc @trusted + { + naked; + push RBX; + mov RBX, 0; + mov RCX, 0; + mov RAX, 0; + mov RDX, 0; + lock; cmpxchg16b [RDI]; + pop RBX; + ret; + } + } + } + } + else static if (needsLoadBarrier!order) + { + version (D_InlineAsm_X86) + { + enum SrcReg = SizedReg!CX; + enum ZeroReg = SizedReg!(DX, T); + enum ResReg = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + mov %1, 0; + mov %2, 0; + mov %0, src; + lock; cmpxchg [%0], %1; + } + }, SrcReg, ZeroReg, ResReg)); + } + else version (D_InlineAsm_X86_64) + { + version (Windows) + enum SrcReg = SizedReg!CX; + else + enum SrcReg = SizedReg!DI; + enum ZeroReg = SizedReg!(DX, T); + enum ResReg = SizedReg!(AX, T); + + mixin (simpleFormat(q{ + asm pure nothrow @nogc @trusted + { + naked; + mov %1, 0; + mov %2, 0; + lock; cmpxchg [%0], %1; + ret; + } + }, SrcReg, ZeroReg, ResReg)); + } + } + else + return *src; } void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @safe if (CanCAS!T) { static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()"); - static assert(__traits(isPOD, T), "Argument to atomicStore() must be POD"); static if (T.sizeof == size_t.sizeof * 2) { @@ -78,7 +189,7 @@ void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure pop EDI; } } - else version(D_InlineAsm_X86_64) + else version (D_InlineAsm_X86_64) { version (Windows) { @@ -537,9 +648,16 @@ void atomicFence(MemoryOrder order = MemoryOrder.seq)() nothrow @nogc @safe private: +version (Windows) +{ + enum RegisterReturn(T) = is(T : U[], U) || is(T : R delegate(A), R, A...); +} + enum CanCAS(T) = is(T : ulong) || is(T == class) || is(T : U*, U) || + is(T : U[], U) || + is(T : R delegate(A), R, A...) || (is(T == struct) && __traits(isPOD, T) && T.sizeof <= size_t.sizeof*2 && // no more than 2 words (T.sizeof & (T.sizeof - 1)) == 0 // is power of 2 From 9183a7dbd487c92296d51fad376888ef40c3dde8 Mon Sep 17 00:00:00 2001 From: Manu Evans Date: Sun, 18 Aug 2019 00:56:19 -0700 Subject: [PATCH 5/5] Better handling of various permutations of shared-ness. --- src/core/atomic.d | 188 +++++++++++++++++++++++++------------ src/core/internal/atomic.d | 2 +- 2 files changed, 131 insertions(+), 59 deletions(-) diff --git a/src/core/atomic.d b/src/core/atomic.d index a68f677ba7..fcbf899868 100644 --- a/src/core/atomic.d +++ b/src/core/atomic.d @@ -12,50 +12,7 @@ module core.atomic; import core.internal.atomic; import core.internal.attributes : betterC; - -version (D_InlineAsm_X86) -{ - version = AsmX86; - version = AsmX86_32; - enum has64BitXCHG = false; - enum has64BitCAS = true; - enum has128BitCAS = false; -} -else version (D_InlineAsm_X86_64) -{ - version = AsmX86; - version = AsmX86_64; - enum has64BitXCHG = true; - enum has64BitCAS = true; - enum has128BitCAS = true; -} -else -{ - enum has64BitXCHG = false; - enum has64BitCAS = false; - enum has128BitCAS = false; -} - -version (AsmX86) -{ - // NOTE: Strictly speaking, the x86 supports atomic operations on - // unaligned values. However, this is far slower than the - // common case, so such behavior should be prohibited. - private bool atomicValueIsProperlyAligned(T)( ref T val ) pure nothrow @nogc @trusted - { - return atomicPtrIsProperlyAligned(&val); - } - - private bool atomicPtrIsProperlyAligned(T)( T* ptr ) pure nothrow @nogc @safe - { - // NOTE: 32 bit x86 systems support 8 byte CAS, which only requires - // 4 byte alignment, so use size_t as the align type here. - static if ( T.sizeof > size_t.sizeof ) - return cast(size_t)ptr % size_t.sizeof == 0; - else - return cast(size_t)ptr % T.sizeof == 0; - } -} +import core.internal.traits : hasUnsharedIndirections; /** * Specifies the memory ordering semantics of an atomic operation. @@ -109,7 +66,8 @@ enum MemoryOrder * Returns: * The value of 'val'. */ -TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @trusted +T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref T val ) pure nothrow @nogc @trusted + if ( !is( T == shared U, U ) && !is( T == shared inout U, U ) && !is( T == shared const U, U ) ) { static if ( __traits(isFloating, T) ) { @@ -118,10 +76,30 @@ TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T return *cast(T*)&r; } else - { - T r = core.internal.atomic.atomicLoad!ms(cast(T*)&val); - return *cast(TailShared!T*)&r; - } + return core.internal.atomic.atomicLoad!ms(&val); +} + +/// Ditto +T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref shared T val ) pure nothrow @nogc @trusted + if ( !hasUnsharedIndirections!T ) +{ + import core.internal.traits : hasUnsharedIndirections; + static assert(!hasUnsharedIndirections!T, "Copying `shared " ~ T.stringof ~ "` would violate shared."); + + return atomicLoad!ms(*cast(T*)&val); +} + +/// Ditto +TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref shared T val ) pure nothrow @nogc @trusted + if ( hasUnsharedIndirections!T ) +{ + // HACK: DEPRECATE THIS FUNCTION, IT IS INVALID TO DO ATOMIC LOAD OF SHARED CLASS + // this is here because code exists in the wild that does this... + + import core.lifetime : move; + + T r = core.internal.atomic.atomicLoad!ms(cast(T*)&val); + return move(*cast(TailShared!T*)&r); } /** @@ -134,8 +112,8 @@ TailShared!T atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T * val = The target variable. * newval = The value to store. */ -void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V)( ref shared T val, V newval ) pure nothrow @nogc @trusted - if ( __traits( compiles, { val = newval; } ) ) +void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V)( ref T val, V newval ) pure nothrow @nogc @trusted + if ( __traits( compiles, { val = newval; } ) && !is(T == shared S, S) && !is(V == shared U, U) ) { static if ( __traits(isFloating, T) ) { @@ -144,7 +122,31 @@ void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V)( ref shared T val, V ne core.internal.atomic.atomicStore!ms(cast(IntTy*)&val, *cast(IntTy*)&newval); } else - core.internal.atomic.atomicStore!ms(cast(T*)&val, newval); + core.internal.atomic.atomicStore!ms(&val, newval); +} + +/// Ditto +void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V)( ref shared T val, V newval ) pure nothrow @nogc @trusted + if ( __traits( compiles, { val = newval; } ) && !is( T == class ) ) +{ + static if ( is ( V == shared U, U ) ) + alias Thunk = U; + else + { + import core.internal.traits : hasUnsharedIndirections; + static assert(!hasUnsharedIndirections!V, "Copying unshared argument `newval` to shared `val` would violate shared."); + alias Thunk = V; + } + atomicStore!ms(*cast(T*)&val, *cast(Thunk*)&newval); +} + +/// Ditto +void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V)( ref shared T val, shared V newval ) pure nothrow @nogc @trusted + if ( is( T == class ) ) +{ + static assert ( is ( V : T ), "Can't assign `newval` of type `shared " ~ V.stringof ~ "` to `shared " ~ T.stringof ~ "`."); + + core.internal.atomic.atomicStore!ms(cast(T*)&val, cast(V)newval); } /** @@ -158,7 +160,7 @@ void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V)( ref shared T val, V ne * Returns: * The value held previously by `val`. */ -TailShared!(T) atomicFetchAdd(MemoryOrder ms = MemoryOrder.seq, T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted +TailShared!T atomicFetchAdd(MemoryOrder ms = MemoryOrder.seq, T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted if ( __traits(isIntegral, T) ) in ( atomicValueIsProperlyAligned(val) ) { @@ -176,7 +178,7 @@ in ( atomicValueIsProperlyAligned(val) ) * Returns: * The value held previously by `val`. */ -TailShared!(T) atomicFetchSub(MemoryOrder ms = MemoryOrder.seq, T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted +TailShared!T atomicFetchSub(MemoryOrder ms = MemoryOrder.seq, T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted if ( __traits(isIntegral, T) ) in ( atomicValueIsProperlyAligned(val) ) { @@ -323,7 +325,6 @@ void atomicFence() nothrow @nogc @safe core.internal.atomic.atomicFence(); } - /** * Performs the binary operation 'op' on val using 'mod' as the modifier. * @@ -385,11 +386,54 @@ in ( atomicValueIsProperlyAligned( val ) ) } } + +version (X86) +{ + version = IsX86; + enum has64BitXCHG = false; + enum has64BitCAS = true; + enum has128BitCAS = false; +} +else version (X86_64) +{ + version = IsX86; + enum has64BitXCHG = true; + enum has64BitCAS = true; + enum has128BitCAS = true; +} +else +{ + enum has64BitXCHG = false; + enum has64BitCAS = false; + enum has128BitCAS = false; +} + private { + version (IsX86) + { + // NOTE: Strictly speaking, the x86 supports atomic operations on + // unaligned values. However, this is far slower than the + // common case, so such behavior should be prohibited. + bool atomicValueIsProperlyAligned(T)( ref T val ) pure nothrow @nogc @trusted + { + return atomicPtrIsProperlyAligned(&val); + } + + bool atomicPtrIsProperlyAligned(T)( T* ptr ) pure nothrow @nogc @safe + { + // NOTE: 32 bit x86 systems support 8 byte CAS, which only requires + // 4 byte alignment, so use size_t as the align type here. + static if ( T.sizeof > size_t.sizeof ) + return cast(size_t)ptr % size_t.sizeof == 0; + else + return cast(size_t)ptr % T.sizeof == 0; + } + } + template IntForFloat(F) + if (__traits(isFloating, F)) { - static assert ( __traits(isFloating, F), "Not a floating point type: " ~ F.stringof ); static if ( F.sizeof == 4 ) alias IntForFloat = uint; else static if ( F.sizeof == 8 ) @@ -398,6 +442,34 @@ private static assert ( false, "Invalid floating point type: " ~ F.stringof ~ ", only support `float` and `double`." ); } + template IntForStruct(S) + if (is(S == struct)) + { + static if ( S.sizeof == 1 ) + alias IntForFloat = ubyte; + else static if ( F.sizeof == 2 ) + alias IntForFloat = ushort; + else static if ( F.sizeof == 4 ) + alias IntForFloat = uint; + else static if ( F.sizeof == 8 ) + alias IntForFloat = ulong; + else static if ( F.sizeof == 16 ) + alias IntForFloat = ulong[2]; // TODO: what's the best type here? slice/delegates pass in registers... + else + static assert (ValidateStruct!S); + } + + template ValidateStruct(S) + if (is(S == struct)) + { + import core.internal.traits : hasElaborateAssign; + + static assert (S.sizeof <= size_t*2 && (S.sizeof & (S.sizeof - 1)) == 0, S.stringof ~ " has invalid size for atomic operations."); + static assert (!hasElaborateAssign!S, S.stringof ~ " may not have an elaborate assignment when used with atomic operations."); + + enum ValidateStruct = true; + } + // TODO: it'd be nice if we had @trusted scopes; we could remove this... bool casByRef(T,V1,V2)( ref T value, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted { @@ -795,7 +867,7 @@ version (unittest) assert(atomicOp!"+="(i8, 8) == 13); assert(atomicOp!"+="(i16, 8) == 14); assert(atomicOp!"+="(i32, 8) == 15); - version (AsmX86_64) + version (D_LP64) { shared ulong u64 = 4; shared long i64 = 8; @@ -819,7 +891,7 @@ version (unittest) assert(atomicOp!"-="(i8, 1) == 4); assert(atomicOp!"-="(i16, 1) == 5); assert(atomicOp!"-="(i32, 1) == 6); - version (AsmX86_64) + version (D_LP64) { shared ulong u64 = 4; shared long i64 = 8; diff --git a/src/core/internal/atomic.d b/src/core/internal/atomic.d index 3f47a20c3c..b8e10c8271 100644 --- a/src/core/internal/atomic.d +++ b/src/core/internal/atomic.d @@ -47,7 +47,7 @@ private enum SizedReg(int reg, T = size_t) = registerNames[reg][RegIndex!T]; } -T atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(T* src) pure nothrow @nogc @trusted +inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted if (CanCAS!T) { static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()");