diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index e042d5f09bd8f5..5e6fac95153c21 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -1145,22 +1145,66 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, GenTree* op2, bool equ #if defined(FEATURE_HW_INTRINSICS) case GT_CNS_VEC: { - // For now, only support SIMD constants up to 16 bytes (SIMD8/12/16). - if (!op1->TypeIs(TYP_SIMD8, TYP_SIMD12, TYP_SIMD16) || (op1->TypeGet() != op2->TypeGet())) + assert(varTypeIsSIMD(op1)); + var_types simdType = op1->TypeGet(); + + if (op2->TypeGet() != simdType) { return NO_ASSERTION_INDEX; } ValueNum op1VN = optConservativeNormalVN(op1); ValueNum op2VN = optConservativeNormalVN(op2); + if (!optLocalAssertionProp && (op1VN == ValueNumStore::NoVN || op2VN == ValueNumStore::NoVN)) { // GlobalAP requires valid VNs. return NO_ASSERTION_INDEX; } + GenTreeVecCon* vecCon = op2->AsVecCon(); + +#if defined(TARGET_XARCH) + // TYP_SIMD32/64 constants are too large to track without a heap allocation. + // + // However, there are many common constants that are effectively broadcasting + // the lowest v128 across the entire vector. By checking for and allowing this + // case through, we can provide pay-for-play support for core scenarios + // without allocating. + + if (simdType == TYP_SIMD64) + { + if (memcmp(&vecCon->gtSimdVal.v128[0], &vecCon->gtSimdVal.v128[1], sizeof(simd16_t)) != 0) + { + return NO_ASSERTION_INDEX; + } + else if (memcmp(&vecCon->gtSimdVal.v256[0], &vecCon->gtSimdVal.v256[1], sizeof(simd32_t)) != 0) + { + return NO_ASSERTION_INDEX; + } + simdType = TYP_SIMD16; + } + else if (simdType == TYP_SIMD32) + { + if (memcmp(&vecCon->gtSimdVal.v128[0], &vecCon->gtSimdVal.v128[1], sizeof(simd16_t)) != 0) + { + return NO_ASSERTION_INDEX; + } + simdType = TYP_SIMD16; + } +#elif defined(TARGET_ARM64) + if (simdType == TYP_SIMD) + { + // TODO-SVE: Handle SVE constants + return NO_ASSERTION_INDEX; + } +#endif + + // Assert we've fixed up the value to fit one of the supported storage sizes + assert((simdType == TYP_SIMD8) || (simdType == TYP_SIMD12) || (simdType == TYP_SIMD16)); + simd16_t simdVal = {}; - memcpy(&simdVal, &op2->AsVecCon()->gtSimdVal, genTypeSize(op2->TypeGet())); + memcpy(&simdVal, &vecCon->gtSimdVal, genTypeSize(simdType)); AssertionDsc dsc = AssertionDsc::CreateConstLclVarAssertion(this, lclNum, op1VN, simdVal, op2VN, equals); @@ -1871,19 +1915,28 @@ AssertionInfo Compiler::optAssertionGenJtrue(GenTree* tree) { #if defined(TARGET_XARCH) case NI_Vector128_op_Equality: + case NI_Vector256_op_Equality: + case NI_Vector512_op_Equality: #elif defined(TARGET_ARM64) case NI_Vector64_op_Equality: case NI_Vector128_op_Equality: #endif + { break; + } + #if defined(TARGET_XARCH) case NI_Vector128_op_Inequality: + case NI_Vector256_op_Inequality: + case NI_Vector512_op_Inequality: #elif defined(TARGET_ARM64) case NI_Vector64_op_Inequality: case NI_Vector128_op_Inequality: #endif + { equals = !equals; break; + } default: return NO_ASSERTION_INDEX; @@ -1902,7 +1955,7 @@ AssertionInfo Compiler::optAssertionGenJtrue(GenTree* tree) return NO_ASSERTION_INDEX; } - assert(op1->TypeIs(TYP_SIMD8, TYP_SIMD12, TYP_SIMD16)); + assert(varTypeIsSIMD(op1)); assert(op1->TypeIs(op2->TypeGet())); } else @@ -3262,16 +3315,50 @@ GenTree* Compiler::optConstantAssertionProp(const AssertionDsc& curAssertion, #if defined(FEATURE_HW_INTRINSICS) case O2K_CONST_VEC: { + assert(varTypeIsSIMD(tree)); + var_types simdType = tree->TypeGet(); + // The assertion was created from a LCL_VAR == CNS_VEC where types matched. - // For now, only support SIMD constants up to 16 bytes (SIMD8/12/16). - if (!tree->TypeIs(TYP_SIMD8, TYP_SIMD12, TYP_SIMD16) || !tree->TypeIs(lvaGetDesc(lclNum)->TypeGet())) + if (lvaGetDesc(lclNum)->TypeGet() != simdType) { return nullptr; } // We can't bash a LCL_VAR into a GenTreeVecCon (different node size), so allocate a fresh node. - GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet()); - memcpy(&vecCon->gtSimdVal, &curAssertion.GetOp2().GetSimdConstant(), genTypeSize(tree->TypeGet())); + + GenTreeVecCon* vecCon = gtNewVconNode(simdType); + const simd16_t& simdVal = curAssertion.GetOp2().GetSimdConstant(); + +#if defined(TARGET_XARCH) + // TYP_SIMD32/64 constants are too large to track without a heap allocation. + // + // However, we support them anyways by only allowing through the cases which + // are effectively broadcasting the lowest v128 across the entire vector. + + if (simdType == TYP_SIMD64) + { + memcpy(&vecCon->gtSimdVal.v128[1], &simdVal, sizeof(simd16_t)); + memcpy(&vecCon->gtSimdVal.v128[2], &simdVal, sizeof(simd16_t)); + memcpy(&vecCon->gtSimdVal.v128[3], &simdVal, sizeof(simd16_t)); + simdType = TYP_SIMD16; + } + else if (simdType == TYP_SIMD32) + { + memcpy(&vecCon->gtSimdVal.v128[1], &simdVal, sizeof(simd16_t)); + simdType = TYP_SIMD16; + } +#elif defined(TARGET_ARM64) + if (simdType == TYP_SIMD) + { + // TODO-SVE: Handle SVE constants + unreached(); + } +#endif + + // Assert we've fixed up the value to account for one of the supported storage sizes + assert((simdType == TYP_SIMD8) || (simdType == TYP_SIMD12) || (simdType == TYP_SIMD16)); + + memcpy(&vecCon->gtSimdVal, &simdVal, genTypeSize(simdType)); newTree = vecCon; break; } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1227434b3a2408..738454389c0fad 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8044,13 +8044,19 @@ class Compiler unsigned m_lclNum; double m_dconVal; IntegralRange m_range; - simd16_t m_simdVal; // for O2K_CONST_VEC (TYP_SIMD8/12/16 only). TODO-CQ: support wider SIMD via heap - // allocation. struct { ssize_t m_iconVal; FieldSeq* m_fieldSeq; } m_icon; + + // O2K_CONST_VEC: This only allows storing TYP_SIMD8/12/16 but + // we still support common cases for TYP_SIMD32/64 by presuming + // that the value is a broadcast. We could fully support other + // sizes in the future by adding m_encodedVconFlags and tracking + // whether a different heap allocated value was used or other + // special cases like Indices or Sequences + simd16_t m_simdVal; }; public: