diff --git a/src/core/simd.d b/src/core/simd.d index 1fb9fe22cb..cd2777db06 100644 --- a/src/core/simd.d +++ b/src/core/simd.d @@ -384,14 +384,58 @@ version ( D_SIMD ) * Returns: * result of opcode */ - pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2); + pure @safe V1 simd(XMM opcode, V1, V2)(V1 op1, V2 op2) + if (is(V1 == __vector) && is(V2 == __vector)) + { + pragma(inline, true); + return cast(V1)__simd(opcode, op1, op2); + } + + pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2); // intrinsic + + /// + unittest + { + float4 a; + a = simd!(XMM.PXOR)(a, a); + } /** * Unary SIMD instructions. */ - pure @safe void16 __simd(XMM opcode, void16 op1); - pure @safe void16 __simd(XMM opcode, double d); /// - pure @safe void16 __simd(XMM opcode, float f); /// + pure @safe V1 simd(XMM opcode, V1)(V1 op1) + if (is(V1 == __vector)) + { + pragma(inline, true); + return cast(V1)__simd(opcode, op1); + } + + /// + pure @safe V1 simd(XMM opcode, V1)(double d) + if (is(V1 == __vector)) + { + pragma(inline, true); + return cast(V1)__simd(opcode, d); + } + + /// + pure @safe V1 simd(XMM opcode, V1)(float f) + if (is(V1 == __vector)) + { + pragma(inline, true); + return cast(V1)__simd(opcode, f); + } + + pure @safe void16 __simd(XMM opcode, void16 op1); // intrinsic + pure @safe void16 __simd(XMM opcode, double d); // intrinsic + pure @safe void16 __simd(XMM opcode, float f); // intrinsic + + /// + unittest + { + float4 a; + a = simd!(XMM.LODSS)(a); + } /**** * For instructions: @@ -408,7 +452,21 @@ version ( D_SIMD ) * Returns: * result of opcode */ - pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8); + pure @safe V1 simd(XMM opcode, ubyte imm8, V1, V2)(V1 op1, V2 op2) + if (is(V1 == __vector) && is(V2 == __vector)) + { + pragma(inline, true); + return cast(V1)__simd(opcode, op1, op2, imm8); + } + + pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8); // intrinsic + + /// + unittest + { + float4 a; + a = simd!(XMM.CMPPD, 0x7A)(a, a); + } /*** * For instructions with the imm8 version: @@ -421,7 +479,21 @@ version ( D_SIMD ) * Returns: * result of opcode */ - pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8); + pure @safe V1 simd(XMM opcode, ubyte imm8, V1)(V1 op1) + if (is(V1 == __vector)) + { + pragma(inline, true); + return cast(V1)__simd_ib(opcode, op1, imm8); + } + + pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8); // intrinsic + + /// + unittest + { + float4 a; + a = simd!(XMM.PSRLQ, 0x7A)(a); + } /***** * For "store" operations of the form: @@ -430,9 +502,44 @@ version ( D_SIMD ) * op2 * These cannot be marked as pure, as semantic() doesn't check them. */ - @safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2); - @safe void16 __simd_sto(XMM opcode, double op1, void16 op2); /// - @safe void16 __simd_sto(XMM opcode, float op1, void16 op2); /// + @safe V1 simd_sto(XMM opcode, V1, V2)(V1 op1, V2 op2) + if (is(V1 == __vector) && is(V2 == __vector)) + { + pragma(inline, true); + return cast(V1)__simd_sto(opcode, op1, op2); + } + + /// + @safe V1 simd_stod(XMM opcode, V1, V2)(double op1, V1 op2) + if (is(V1 == __vector)) + { + pragma(inline, true); + return cast(V1)__simd_sto(opcode, op1, op2); + } + + /// + @safe V1 simd_stof(XMM opcode, V1)(float op1, V1 op2) + if (is(V1 == __vector)) + { + pragma(inline, true); + return cast(V1)__simd_sto(opcode, op1, op2); + } + + @safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2); // intrinsic + @safe void16 __simd_sto(XMM opcode, double op1, void16 op2); // intrinsic + @safe void16 __simd_sto(XMM opcode, float op1, void16 op2); // intrinsic + + /// + unittest + { + void16 a; + float f = 1; + double d = 1; + + cast(void)simd_sto!(XMM.STOUPS)(a, a); + //simd_sto!(XMM.STOUPS)(f, a); + //simd_sto!(XMM.STOUPS)(d, a); + } /* The following use overloading to ensure correct typing. * Compile with inlining on for best performance. @@ -440,12 +547,12 @@ version ( D_SIMD ) pure @safe short8 pcmpeq()(short8 v1, short8 v2) { - return __simd(XMM.PCMPEQW, v1, v2); + return cast(short8)__simd(XMM.PCMPEQW, v1, v2); } pure @safe ushort8 pcmpeq()(ushort8 v1, ushort8 v2) { - return __simd(XMM.PCMPEQW, v1, v2); + return cast(ushort8)__simd(XMM.PCMPEQW, v1, v2); } /*********************