From 5a17ad5d4fc0a9922124bf6e459f2d54d3a87804 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 17 Jan 2018 07:34:00 -0800 Subject: [PATCH 1/4] Fixing some bad merge conflicts in the `emitIns_R_A_I`, `emitIns_R_C_I`, and `emitIns_R_S_I` methods --- src/jit/emitxarch.cpp | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 3f554f4b6e96..f55226fde6e7 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -3929,9 +3929,7 @@ void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTre void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival) { noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); - assert(IsSSEOrAVXInstruction(ins)); - assert(IsThreeOperandAVXInstruction(ins)); ssize_t offs = indir->Offset(); instrDesc* id = emitNewInstrAmdCns(attr, offs, ival); @@ -3939,7 +3937,7 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT id->idIns(ins); id->idReg1(reg1); - emitHandleMemOp(indir, id, IF_RWR_ARD_CNS, ins); + emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins); // Plus one for the 1-byte immediate (ival) UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1; @@ -3952,22 +3950,20 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT void emitter::emitIns_R_C_I( instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival) { - noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); - - assert(IsSSEOrAVXInstruction(ins)); - assert(IsThreeOperandAVXInstruction(ins)); - // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) { attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); } + noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); + assert(IsSSEOrAVXInstruction(ins)); + instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs); UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1; id->idIns(ins); - id->idInsFmt(IF_RWR_RRD_MRD_CNS); + id->idInsFmt(IF_RRW_MRD_CNS); id->idReg1(reg1); id->idAddr()->iiaFieldHnd = fldHnd; @@ -3979,16 +3975,14 @@ void emitter::emitIns_R_C_I( void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival) { noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); - assert(IsSSEOrAVXInstruction(ins)); - assert(IsThreeOperandAVXInstruction(ins)); instrDesc* id = emitNewInstrCns(attr, ival); UNATIVE_OFFSET sz = emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1; id->idIns(ins); - id->idInsFmt(IF_RWR_RRD_SRD_CNS); + id->idInsFmt(IF_RRW_SRD_CNS); id->idReg1(reg1); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); @@ -5190,7 +5184,7 @@ void emitter::emitIns_SIMD_R_R_A( { emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); } - emitIns_R_A(ins, emitTypeSize(simdtype), reg, indir, IF_RWR_ARD); + emitIns_R_A(ins, emitTypeSize(simdtype), reg, indir, IF_RRW_ARD); } } @@ -7341,6 +7335,7 @@ void emitter::emitDispIns( emitDispAddrMode(id); break; + case IF_RRW_ARD_CNS: case IF_RWR_ARD_CNS: { printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); @@ -7531,6 +7526,7 @@ void emitter::emitDispIns( break; + case IF_RRW_SRD_CNS: case IF_RWR_SRD_CNS: { printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); @@ -7707,6 +7703,7 @@ void emitter::emitDispIns( emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); break; + case IF_RRW_MRD_CNS: case IF_RWR_MRD_CNS: { printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); @@ -11750,6 +11747,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) break; case IF_RRW_ARD_CNS: + case IF_RWR_ARD_CNS: emitGetInsAmdCns(id, &cnsVal); code = insCodeRM(ins); @@ -11779,7 +11777,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) sz = emitSizeOfInsDsc(id); break; - case IF_RWR_ARD_CNS: case IF_RWR_RRD_ARD_CNS: { emitGetInsAmdCns(id, &cnsVal); @@ -11876,6 +11873,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) break; case IF_RRW_SRD_CNS: + case IF_RWR_SRD_CNS: emitGetInsCns(id, &cnsVal); code = insCodeRM(ins); @@ -11941,7 +11939,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) } break; - case IF_RWR_SRD_CNS: case IF_RWR_RRD_SRD_CNS: { emitGetInsCns(id, &cnsVal); @@ -12021,6 +12018,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) break; case IF_RRW_MRD_CNS: + case IF_RWR_MRD_CNS: emitGetInsDcmCns(id, &cnsVal); code = insCodeRM(ins); @@ -12085,7 +12083,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) sz = emitSizeOfInsDsc(id); break; - case IF_RWR_MRD_CNS: case IF_RWR_RRD_MRD_CNS: { emitGetInsCns(id, &cnsVal); From ff39b0f050378d935304db36cb0060730457acab Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 17 Jan 2018 07:51:43 -0800 Subject: [PATCH 2/4] Fixing the `LoadAlignedVector128` HWIntrinsic test to ensure that we always read from an aligned address. --- .../X86/Sse/LoadAlignedVector128.cs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse/LoadAlignedVector128.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse/LoadAlignedVector128.cs index fdaf40ac1923..eaf69099410c 100644 --- a/tests/src/JIT/HardwareIntrinsics/X86/Sse/LoadAlignedVector128.cs +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse/LoadAlignedVector128.cs @@ -22,7 +22,8 @@ static unsafe int Main(string[] args) if (Sse.IsSupported) { - float* inArray = stackalloc float[4]; + byte* inBuffer = stackalloc byte[32]; + float* inArray = Align(inBuffer, 16); float* outArray = stackalloc float[4]; var vf = Sse.LoadAlignedVector128(inArray); @@ -47,5 +48,15 @@ static unsafe int Main(string[] args) return testResult; } + + static unsafe float* Align(byte* buffer, byte expectedAlignment) + { + // Compute how bad the misalignment is, which is at most (expectedAlignment - 1). + // Then subtract that from the expectedAlignment and add it to the original address + // to compute the aligned address. + + var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment); + return (float*)(buffer + misalignment); + } } } From d9aa56fcc3901f2fe2c4fb9a6817cb3dcf260651 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 17 Jan 2018 16:23:34 -0800 Subject: [PATCH 3/4] Disabling the Math.Round, Math.Floor, and Math.Ceiling intrinsics on non-AVX machines --- src/jit/importer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 5c7144d51d50..b81406e0515c 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -19127,7 +19127,8 @@ bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId) case CORINFO_INTRINSIC_Round: case CORINFO_INTRINSIC_Ceiling: case CORINFO_INTRINSIC_Floor: - return compSupports(InstructionSet_SSE41); + // TODO-XArch-CQ: Update to work on non-AVX machines: https://github.com/dotnet/coreclr/issues/15908 + return compSupports(InstructionSet_SSE41) && canUseVexEncoding(); default: return false; From f9a985db166ec393dc4683ecaaf083d370ae8c0a Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 17 Jan 2018 23:53:10 -0800 Subject: [PATCH 4/4] Updating TYP_SIMD locals to no longer undergo struct promotion for HWIntrinsic nodes. --- src/jit/gentree.cpp | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 266870fbe092..9fd4acfb2e24 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -7319,7 +7319,11 @@ void Compiler::gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOr { src = src->AsIndir()->Addr()->gtGetOp1(); } +#ifdef FEATURE_HW_INTRINSICS + if ((src->OperGet() == GT_SIMD) || (src->OperGet() == GT_HWIntrinsic)) +#else if (src->OperGet() == GT_SIMD) +#endif // FEATURE_HW_INTRINSICS { if (dst->OperIsBlk() && (dst->AsIndir()->Addr()->OperGet() == GT_ADDR)) { @@ -17850,10 +17854,7 @@ GenTreeSIMD* Compiler::gtNewSIMDNode( { assert(op1 != nullptr); SetOpLclRelatedToSIMDIntrinsic(op1); - if (op2 != nullptr) - { - SetOpLclRelatedToSIMDIntrinsic(op2); - } + SetOpLclRelatedToSIMDIntrinsic(op2); return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size); } @@ -17867,14 +17868,17 @@ GenTreeSIMD* Compiler::gtNewSIMDNode( // void Compiler::SetOpLclRelatedToSIMDIntrinsic(GenTreePtr op) { - if (op->OperIsLocal()) - { - setLclRelatedToSIMDIntrinsic(op); - } - else if ((op->OperGet() == GT_OBJ) && (op->gtOp.gtOp1->OperGet() == GT_ADDR) && - op->gtOp.gtOp1->gtOp.gtOp1->OperIsLocal()) + if (op != nullptr) { - setLclRelatedToSIMDIntrinsic(op->gtOp.gtOp1->gtOp.gtOp1); + if (op->OperIsLocal()) + { + setLclRelatedToSIMDIntrinsic(op); + } + else if ((op->OperGet() == GT_OBJ) && (op->gtOp.gtOp1->OperGet() == GT_ADDR) && + op->gtOp.gtOp1->gtOp.gtOp1->OperIsLocal()) + { + setLclRelatedToSIMDIntrinsic(op->gtOp.gtOp1->gtOp.gtOp1); + } } } @@ -17912,12 +17916,17 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode( var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) { + SetOpLclRelatedToSIMDIntrinsic(op1); + return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, hwIntrinsicID, baseType, size); } GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode( var_types type, GenTree* op1, GenTree* op2, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) { + SetOpLclRelatedToSIMDIntrinsic(op1); + SetOpLclRelatedToSIMDIntrinsic(op2); + return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, baseType, size); } @@ -17929,6 +17938,10 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, var_types baseType, unsigned size) { + SetOpLclRelatedToSIMDIntrinsic(op1); + SetOpLclRelatedToSIMDIntrinsic(op2); + SetOpLclRelatedToSIMDIntrinsic(op3); + return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3), hwIntrinsicID, baseType, size); } @@ -17942,12 +17955,19 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, var_types baseType, unsigned size) { + SetOpLclRelatedToSIMDIntrinsic(op1); + SetOpLclRelatedToSIMDIntrinsic(op2); + SetOpLclRelatedToSIMDIntrinsic(op3); + SetOpLclRelatedToSIMDIntrinsic(op4); + return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3, op4), hwIntrinsicID, baseType, size); } GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID) { + SetOpLclRelatedToSIMDIntrinsic(op1); + return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, hwIntrinsicID, TYP_UNKNOWN, 0); } @@ -17956,6 +17976,9 @@ GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, GenTree* op2, NamedIntrinsic hwIntrinsicID) { + SetOpLclRelatedToSIMDIntrinsic(op1); + SetOpLclRelatedToSIMDIntrinsic(op2); + return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, TYP_UNKNOWN, 0); }