Skip to content
This repository was archived by the owner on Oct 12, 2022. It is now read-only.
/ druntime Public archive
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions changelog/vectorized_array_ops.dd
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Vectorized array operations are now templated

Array operations have been converted from dedicated assembly routines for $(B some) array operations to a generic template implementation for $(B all) array operations. This provides huge performance increases (2-4x higher throughput) for array operations that were not previously vectorized.
Furthermore the implementation makes better use of vectorization even for short arrays to heavily reduce latency for some operations (up to 4x).

For GDC/LDC the implementation relies on auto-vectorization, for DMD the implementation performs the vectorization itself. Support for vector operations with DMD is determined statically (`-march=native`, `-march=avx2`) to avoid binary bloat and the small test overhead. DMD enables SSE2 for 64-bit targets by default.

Also see $(DRUNTIMEPR 1891)

$(RED Note:) The implementation no longer weakens floating point divisions (e.g. `ary[] / scalar`) to multiplication (`ary[] * (1.0 / scalar)`) as that may reduce precision. To preserve the higher performance of float multiplication when loss of precision is acceptable, use either `-ffast-math` with GDC/LDC or manually rewrite your code to multiply by `(1.0 / scalar)` for DMD.
193 changes: 159 additions & 34 deletions src/core/internal/arrayop.d
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module core.internal.arrayop;
import core.internal.traits : Filter, Unqual;
import core.internal.traits : Filter, staticMap, TypeTuple, Unqual;

version (GNU) version = GNU_OR_LDC;
version (LDC) version = GNU_OR_LDC;
Expand All @@ -9,8 +9,8 @@ version (LDC) version = GNU_OR_LDC;
* types and operations are passed as template arguments in Reverse Polish
* Notation (RPN).

* Operands can be slices or scalar types. The unqualified element types of all
* slices must be `T`, scalar types must be implicitly convertible to `T`.
* Operands can be slices or scalar types. The element types of all
* slices and all scalar types must be implicitly convertible to `T`.
*
* Operations are encoded as strings, e.g. `"+"`, `"%"`, `"*="`. Unary
* operations are prefixed with "u", e.g. `"u-"`, `"u~"`. Only the last
Expand All @@ -27,7 +27,8 @@ version (LDC) version = GNU_OR_LDC;
*/
T[] arrayOp(T : T[], Args...)(T[] res, Filter!(isType, Args) args) @trusted @nogc pure nothrow
{
enum check = opsSupported!(true, T, Filter!(not!isType, Args)); // must support all scalar ops
alias scalarizedExp = staticMap!(toElementType, Args);
alias check = typeCheck!(true, T, scalarizedExp); // must support all scalar ops

size_t pos;
static if (vectorizeable!(T[], Args))
Expand Down Expand Up @@ -117,35 +118,86 @@ version (DigitalMars)

// mixin gen

// Check whether operations `ops` are supported for type `T`. Fails with a human-friendly static assert message, if `fail` is true.
template opsSupported(bool fail, T, ops...) if (ops.length > 1)
{
enum opsSupported = opsSupported!(fail, T, ops[0 .. $ / 2])
&& opsSupported!(fail, T, ops[$ / 2 .. $]);
}

template opsSupported(bool fail, T, string op)
/**
Check whether operations on operand types are supported. This
template recursively reduces the expression tree and determines
intermediate types.
Type checking is done here rather than in the compiler to provide more
detailed error messages.

Params:
fail = whether to fail (static assert) with a human-friendly error message
T = type of result
Args = operand types and operations in RPN
Returns:
The resulting type of the expression
See_Also:
$(LREF arrayOp)
*/
template typeCheck(bool fail, T, Args...)
{
static if (isUnaryOp(op))
enum idx = staticIndexOf!(not!isType, Args);
static if (isUnaryOp(Args[idx]))
{
enum opsSupported = is(typeof((T a) => mixin(op[1 .. $] ~ " a")));
static assert(!fail || opsSupported,
"Unary op `" ~ op[1 .. $] ~ "` not supported for element type " ~ T.stringof ~ ".");
alias UT = Args[idx - 1];
enum op = Args[idx][1 .. $];
static if (is(typeof((UT a) => mixin(op ~ " a")) RT == return))
alias typeCheck = typeCheck!(fail, T, Args[0 .. idx - 1], RT, Args[idx + 1 .. $]);
else static if (fail)
static assert(0, "Unary `" ~ op ~ "` not supported for type `" ~ UT.stringof ~ "`.");
}
else
else static if (isBinaryOp(Args[idx]))
{
enum opsSupported = is(typeof((T a, T b) => mixin("a " ~ op ~ " b")));
static assert(!fail || opsSupported,
"Binary op `" ~ op ~ "` not supported for element type " ~ T.stringof ~ ".");
alias LHT = Args[idx - 2];
alias RHT = Args[idx - 1];
enum op = Args[idx];
static if (is(typeof((LHT a, RHT b) => mixin("a " ~ op ~ " b")) RT == return))
alias typeCheck = typeCheck!(fail, T, Args[0 .. idx - 2], RT, Args[idx + 1 .. $]);
else static if (fail)
static assert(0,
"Binary `" ~ op ~ "` not supported for types `"
~ LHT.stringof ~ "` and `" ~ RHT.stringof ~ "`.");
}
else static if (Args[idx] == "=" || isBinaryAssignOp(Args[idx]))
{
alias RHT = Args[idx - 1];
enum op = Args[idx];
static if (is(T == __vector(ET[N]), ET, size_t N))
{
// no `cast(T)` before assignment for vectors
static if (is(typeof((T res, RHT b) => mixin("res " ~ op ~ " b")) RT == return)
&& // workaround https://issues.dlang.org/show_bug.cgi?id=17758
(op != "=" || is(Unqual!T == Unqual!RHT)))
alias typeCheck = typeCheck!(fail, T, Args[0 .. idx - 1], RT, Args[idx + 1 .. $]);
else static if (fail)
static assert(0,
"Binary op `" ~ op ~ "` not supported for types `"
~ T.stringof ~ "` and `" ~ RHT.stringof ~ "`.");
}
else
{
static if (is(typeof((RHT b) => mixin("cast(T) b"))))
{
static if (is(typeof((T res, T b) => mixin("res " ~ op ~ " b")) RT == return))
alias typeCheck = typeCheck!(fail, T, Args[0 .. idx - 1], RT, Args[idx + 1 .. $]);
else static if (fail)
static assert(0,
"Binary op `" ~ op ~ "` not supported for types `"
~ T.stringof ~ "` and `" ~ T.stringof ~ "`.");
}
else static if (fail)
static assert(0,
"`cast(" ~ T.stringof ~ ")` not supported for type `" ~ RHT.stringof ~ "`.");
}
}
else
static assert(0);
}
/// ditto
template typeCheck(bool fail, T, ResultType)
{
alias typeCheck = ResultType;
}

// check whether slices have the unqualified element type `E` and scalars are implicitly convertible to `E`
// i.e. filter out things like float[] = float[] / size_t[]
enum compatibleVecTypes(E, T : T[]) = is(Unqual!T == Unqual!E); // array elem types must be same (maybe add cvtpi2ps)
enum compatibleVecTypes(E, T) = is(T : E); // scalar must be convertible to target elem type
enum compatibleVecTypes(E, Types...) = compatibleVecTypes!(E, Types[0 .. $ / 2])
&& compatibleVecTypes!(E, Types[$ / 2 .. $]);

version (GNU_OR_LDC)
{
Expand All @@ -158,16 +210,23 @@ else
template vectorizeable(E : E[], Args...)
{
static if (is(vec!E))
enum vectorizeable = opsSupported!(false, vec!E, Filter!(not!isType, Args))
&& compatibleVecTypes!(E, Filter!(isType, Args));
{
// type check with vector types
enum vectorizeable = is(typeCheck!(false, vec!E, staticMap!(toVecType, Args)));
}
else
enum vectorizeable = false;
}

version (X86_64) unittest
{
pragma(msg, vectorizeable!(double[], const(double)[], double[], "+", "="));
static assert(vectorizeable!(double[], const(double)[], double[], "+", "="));
static assert(!vectorizeable!(double[], const(ulong)[], double[], "+", "="));
// Vector type are (atm.) not implicitly convertible and would require
// lots of SIMD intrinsics. Therefor leave mixed type array ops to
// GDC/LDC's auto-vectorizers.
static assert(!vectorizeable!(double[], const(uint)[], uint, "+", "="));
}
}

Expand Down Expand Up @@ -224,7 +283,7 @@ string scalarExp(Args...)()
}
else static if (isBinaryOp(arg))
{
stack[$ - 2] = "(cast(T)(" ~ stack[$ - 2] ~ " " ~ arg ~ " " ~ stack[$ - 1] ~ "))";
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The existing arrayop behavior only casts the whole RHS before assignment, but not intermediate expressions. This is fixed here.

stack[$ - 2] = "(" ~ stack[$ - 2] ~ " " ~ arg ~ " " ~ stack[$ - 1] ~ ")";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any way this and the following red lines can be turned to green?

Copy link
Member Author

@MartinNowak MartinNowak Aug 18, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's CTFE only functions as most things in this module.

stack.length -= 1;
}
else
Expand All @@ -238,16 +297,17 @@ string scalarExp(Args...)()
// `args` to contain operand values.
string initScalarVecs(Args...)()
{
size_t scalarsIdx;
size_t scalarsIdx, argsIdx;
string res;
foreach (aidx, arg; Args)
foreach (arg; Args)
{
static if (is(arg == T[], T))
{
++argsIdx;
}
else static if (is(arg))
res ~= "immutable vec scalar" ~ scalarsIdx++.toString ~ " = args["
~ aidx.toString ~ "];\n";
~ argsIdx++.toString ~ "];\n";
}
return res;
}
Expand All @@ -259,7 +319,7 @@ string vectorExp(Args...)()
{
size_t scalarsIdx, argsIdx;
string[] stack;
foreach (i, arg; Args)
foreach (arg; Args)
{
static if (is(arg == T[], T))
stack ~= "load(&args[" ~ argsIdx++.toString ~ "][pos])";
Expand Down Expand Up @@ -302,6 +362,33 @@ template not(alias tmlp)
{
enum not(Args...) = !tmlp!Args;
}
/**
Find element in `haystack` for which `pred` is true.

Params:
pred = the template predicate
haystack = elements to search
Returns:
The first index for which `pred!haystack[index]` is true or -1.
*/
template staticIndexOf(alias pred, haystack...)
{
static if (pred!(haystack[0]))
enum staticIndexOf = 0;
else
{
enum next = staticIndexOf!(pred, haystack[1 .. $]);
enum staticIndexOf = next == -1 ? -1 : next + 1;
}
}
/// converts slice types to their element type, preserves anything else
alias toElementType(E : E[]) = E;
alias toElementType(S) = S;
alias toElementType(alias op) = op;
/// converts slice types to their element type, preserves anything else
alias toVecType(E : E[]) = vec!E;
alias toVecType(S) = vec!S;
alias toVecType(alias op) = op;

string toString(size_t num)
{
Expand Down Expand Up @@ -449,3 +536,41 @@ unittest
static assert(is(typeof(&arrayOp!(S2[], S2[], S2[], S2, "*", "+", "="))));
static assert(is(typeof(&arrayOp!(S2[], S2[], S2, "*", "+="))));
}

// test mixed type array op
unittest
{
uint[32] a = 0xF;
float[32] res = 2.0f;
arrayOp!(float[], const(uint)[], uint, "&", "*=")(res[], a[], 12);
foreach (v; res[])
assert(v == 24.0f);
}

// test mixed type array op
unittest
{
static struct S
{
float opBinary(string op)(in S) @nogc const pure nothrow
{
return 2.0f;
}
}

float[32] res = 24.0f;
S[32] s;
arrayOp!(float[], const(S)[], const(S)[], "+", "/=")(res[], s[], s[]);
foreach (v; res[])
assert(v == 12.0f);
}

// test scalar after operation argument
unittest
{
float[32] res, a = 2, b = 3;
float c = 4;
arrayOp!(float[], const(float)[], const(float)[], "*", float, "+", "=")(res[], a[], b[], c);
foreach (v; res[])
assert(v == 2 * 3 + 4);
}
20 changes: 20 additions & 0 deletions src/core/internal/traits.d
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,23 @@ template Filter(alias pred, TList...)
Filter!(pred, TList[$/2 .. $ ]));
}
}

// std.meta.staticMap
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the documentation for std.meta.staticMap, all it says is:

Evaluates to $(D AliasSeq!(F!(T[0]), F!(T[1]), ..., F!(T[$ - 1]))).

but that is not what this template body does.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's exactly what this template does, in fact I've copied the implementation.
We still need to rename TypeTuple to AliasSeq in this module though.

template staticMap(alias F, T...)
{
static if (T.length == 0)
{
alias staticMap = TypeTuple!();
}
else static if (T.length == 1)
{
alias staticMap = TypeTuple!(F!(T[0]));
}
else
{
alias staticMap =
TypeTuple!(
staticMap!(F, T[ 0 .. $/2]),
staticMap!(F, T[$/2 .. $ ]));
}
}