diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index e32a1b13ef6b52..d0de421541dcbc 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -638,7 +638,8 @@ void Rationalizer::RewriteHWIntrinsicBlendv(GenTree** use, Compiler::GenTreeStac return; } - GenTree* op2 = node->Op(2); + GenTree* op2 = node->Op(2); + GenTree*& op3 = node->Op(3); // We're in the post-order visit and are traversing in execution order, so // everything between op2 and node will have already been rewritten to LIR @@ -648,7 +649,47 @@ void Rationalizer::RewriteHWIntrinsicBlendv(GenTree** use, Compiler::GenTreeStac // variant SideEffectSet scratchSideEffects; - if (scratchSideEffects.IsLirInvariantInRange(m_compiler, op2, node)) + // If the mask was originally a vector, we don't want to create a mask solely for + // the purpose of embedding it. vpmov*2m is relatively costly compared to blendvp*. + if (op3->OperIsConvertVectorToMask()) + { + // The non-mask blend instructions only come in byte (pblendvb) or floating + // (blendvp[sd]) forms. We can use the byte variant as long as we have a + // per-element mask, or we can simply use the equivalent-sized floating type. + GenTree* maskVector = op3->AsHWIntrinsic()->Op(1); + + if (!maskVector->IsVectorPerElementMask(simdBaseType, simdSize)) + { + switch (simdBaseType) + { + case TYP_SHORT: + case TYP_USHORT: + { + return; + } + + case TYP_INT: + case TYP_UINT: + { + simdBaseType = TYP_FLOAT; + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + simdBaseType = TYP_DOUBLE; + break; + } + + default: + { + break; + } + } + } + } + else if (scratchSideEffects.IsLirInvariantInRange(m_compiler, op2, node)) { unsigned tgtMaskSize = simdSize / genTypeSize(simdBaseType); var_types tgtSimdBaseType = TYP_UNDEF; @@ -667,8 +708,6 @@ void Rationalizer::RewriteHWIntrinsicBlendv(GenTree** use, Compiler::GenTreeStac } } - GenTree*& op3 = node->Op(3); - if (!ShouldRewriteToNonMaskHWIntrinsic(op3)) { return; @@ -694,6 +733,7 @@ void Rationalizer::RewriteHWIntrinsicBlendv(GenTree** use, Compiler::GenTreeStac intrinsic = NI_X86Base_BlendVariable; } + node->SetSimdBaseType(simdBaseType); node->ChangeHWIntrinsicId(intrinsic); } diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_127260/Runtime_127260.cs b/src/tests/JIT/Regression/JitBlue/Runtime_127260/Runtime_127260.cs new file mode 100644 index 00000000000000..4edb8b913c7c4a --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_127260/Runtime_127260.cs @@ -0,0 +1,148 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using Xunit; + +public class Runtime_127260 +{ + [ConditionalFact(typeof(Sse41), nameof(Sse41.IsSupported))] + public static void TestBlendVariable() + { + Assert.Equal(Vector128.Zero, + BlendVariableSse41Single(Vector128.Create(-1.0f), Vector128.Zero, Vector128.Create(-0.0f))); + + Assert.Equal(Vector128.Zero, + BlendVariableSse41Double(Vector128.Create(-1.0), Vector128.Zero, Vector128.Create(-0.0))); + + Assert.Equal(Vector128.Zero, + BlendVariableSse41Int8(Vector128.Create(-1), Vector128.Zero, Vector128.Create(sbyte.MinValue))); + + Assert.Equal(Vector128.Create(0x00FF), + BlendVariableSse41Int16(Vector128.Create(-1), Vector128.Zero, Vector128.Create(short.MinValue))); + + Assert.Equal(Vector128.Create(0x00FFFFFF), + BlendVariableSse41Int32(Vector128.Create(-1), Vector128.Zero, Vector128.Create(int.MinValue))); + + Assert.Equal(Vector128.Create(0x00FFFFFF_FFFFFFFF), + BlendVariableSse41Int64(Vector128.Create(-1), Vector128.Zero, Vector128.Create(long.MinValue))); + } + + [ConditionalFact(typeof(Avx512BW.VL), nameof(Avx512BW.VL.IsSupported))] + public static void TestBlendVariableMask() + { + Assert.Equal(Vector128.Zero, + BlendVariableAvx512Single(Vector128.Create(-1.0f), Vector128.Zero, Vector128.Create(-0.0f))); + + Assert.Equal(Vector128.Zero, + BlendVariableAvx512Double(Vector128.Create(-1.0), Vector128.Zero, Vector128.Create(-0.0))); + + Assert.Equal(Vector128.Zero, + BlendVariableAvx512Int8(Vector128.Create(-1), Vector128.Zero, Vector128.Create(sbyte.MinValue))); + + Assert.Equal(Vector128.Zero, + BlendVariableAvx512Int16(Vector128.Create(-1), Vector128.Zero, Vector128.Create(short.MinValue))); + + Assert.Equal(Vector128.Zero, + BlendVariableAvx512Int32(Vector128.Create(-1), Vector128.Zero, Vector128.Create(int.MinValue))); + + Assert.Equal(Vector128.Zero, + BlendVariableAvx512Int64(Vector128.Create(-1), Vector128.Zero, Vector128.Create(long.MinValue))); + } + + [ConditionalFact(typeof(Avx512BW.VL), nameof(Avx512BW.VL.IsSupported))] + public static void TestContainableMask() + { + Assert.Equal(Vector128.Zero, + AddToNegativeSingle(Vector128.Create(-1.0f), Vector128.One)); + + Assert.Equal(Vector128.Zero, + AddToNegativeDouble(Vector128.Create(-1.0), Vector128.One)); + + Assert.Equal(Vector128.Zero, + AddToNegativeInt8(Vector128.Create(-1), Vector128.One)); + + Assert.Equal(Vector128.Zero, + AddToNegativeInt16(Vector128.Create(-1), Vector128.One)); + + Assert.Equal(Vector128.Zero, + AddToNegativeInt32(Vector128.Create(-1), Vector128.One)); + + Assert.Equal(Vector128.Zero, + AddToNegativeInt64(Vector128.Create(-1), Vector128.One)); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableSse41Single(Vector128 left, Vector128 right, Vector128 mask) + => Sse41.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableSse41Double(Vector128 left, Vector128 right, Vector128 mask) + => Sse41.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableSse41Int8(Vector128 left, Vector128 right, Vector128 mask) + => Sse41.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableSse41Int16(Vector128 left, Vector128 right, Vector128 mask) + => Sse41.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableSse41Int32(Vector128 left, Vector128 right, Vector128 mask) + => Sse41.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableSse41Int64(Vector128 left, Vector128 right, Vector128 mask) + => Sse41.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableAvx512Single(Vector128 left, Vector128 right, Vector128 mask) + => Avx512F.VL.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableAvx512Double(Vector128 left, Vector128 right, Vector128 mask) + => Avx512F.VL.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableAvx512Int8(Vector128 left, Vector128 right, Vector128 mask) + => Avx512BW.VL.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableAvx512Int16(Vector128 left, Vector128 right, Vector128 mask) + => Avx512BW.VL.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableAvx512Int32(Vector128 left, Vector128 right, Vector128 mask) + => Avx512F.VL.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 BlendVariableAvx512Int64(Vector128 left, Vector128 right, Vector128 mask) + => Avx512F.VL.BlendVariable(left, right, mask); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AddToNegativeSingle(Vector128 left, Vector128 right) + => Sse41.BlendVariable(left, left + right, left); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AddToNegativeDouble(Vector128 left, Vector128 right) + => Sse41.BlendVariable(left, left + right, left); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AddToNegativeInt8(Vector128 left, Vector128 right) + => Sse41.BlendVariable(left, left + right, left); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AddToNegativeInt16(Vector128 left, Vector128 right) + => Avx512BW.VL.BlendVariable(left, left + right, left); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AddToNegativeInt32(Vector128 left, Vector128 right) + => Avx512F.VL.BlendVariable(left, left + right, left); + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AddToNegativeInt64(Vector128 left, Vector128 right) + => Avx512F.VL.BlendVariable(left, left + right, left); +} \ No newline at end of file diff --git a/src/tests/JIT/Regression/Regression_ro_2.csproj b/src/tests/JIT/Regression/Regression_ro_2.csproj index 5f898c2b921537..04778781d23124 100644 --- a/src/tests/JIT/Regression/Regression_ro_2.csproj +++ b/src/tests/JIT/Regression/Regression_ro_2.csproj @@ -95,6 +95,7 @@ +