From 8225ec354bdcbf6026c084dc58ae05e3de9183af Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 9 Jan 2018 09:51:40 -0800 Subject: [PATCH 1/3] Adding basic containment support to the x86 HWIntrinsics --- src/jit/codegenlinear.h | 1 + src/jit/emitfmtsxarch.h | 5 + src/jit/emitxarch.cpp | 227 +++++++++++++++++++++++++--- src/jit/emitxarch.h | 28 ++++ src/jit/hwintrinsiccodegenxarch.cpp | 174 ++++++++++++++------- src/jit/lowerxarch.cpp | 31 +++- 6 files changed, 388 insertions(+), 78 deletions(-) diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index 5804fa844521..91ee30ca75dd 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -116,6 +116,7 @@ void genPutArgStkSIMD12(GenTree* treeNode); #if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_) void genHWIntrinsic(GenTreeHWIntrinsic* node); +void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins); void genSSEIntrinsic(GenTreeHWIntrinsic* node); void genSSE2Intrinsic(GenTreeHWIntrinsic* node); void genSSE3Intrinsic(GenTreeHWIntrinsic* node); diff --git a/src/jit/emitfmtsxarch.h b/src/jit/emitfmtsxarch.h index 6d15fcf22f44..f00e64fb2030 100644 --- a/src/jit/emitfmtsxarch.h +++ b/src/jit/emitfmtsxarch.h @@ -123,6 +123,7 @@ IF_DEF(RRD_MRD, IS_GM_RD|IS_R1_RD, DSP) // read reg , read [ IF_DEF(RWR_MRD, IS_GM_RD|IS_R1_WR, DSP) // write reg , read [mem] IF_DEF(RRW_MRD, IS_GM_RD|IS_R1_RW, DSP) // r/w reg , read [mem] +IF_DEF(RWR_RRD_MRD, IS_GM_RD|IS_R1_WR|IS_R2_RD, DSP) // write reg , read reg2 , read [mem] IF_DEF(RWR_MRD_OFF, IS_GM_RD|IS_R1_WR, DSP) // write reg , offset mem IF_DEF(MRD_RRD, IS_GM_RD|IS_R1_RD, DSP) // read [mem], read reg @@ -147,6 +148,8 @@ IF_DEF(RRD_SRD, IS_SF_RD|IS_R1_RD, NONE) // read reg , read [ IF_DEF(RWR_SRD, IS_SF_RD|IS_R1_WR, NONE) // write reg , read [stk] IF_DEF(RRW_SRD, IS_SF_RD|IS_R1_RW, NONE) // r/w reg , read [stk] +IF_DEF(RWR_RRD_SRD, IS_SF_RD|IS_R1_WR|IS_R2_RD, NONE) // write reg , read reg2, read [stk] + IF_DEF(SRD_RRD, IS_SF_RD|IS_R1_RD, NONE) // read [stk], read reg IF_DEF(SWR_RRD, IS_SF_WR|IS_R1_RD, NONE) // write [stk], read reg IF_DEF(SRW_RRD, IS_SF_RW|IS_R1_RD, NONE) // r/w [stk], read reg @@ -170,6 +173,8 @@ IF_DEF(RRD_ARD, IS_AM_RD|IS_R1_RD, AMD ) // read reg , read [ IF_DEF(RWR_ARD, IS_AM_RD|IS_R1_WR, AMD ) // write reg , read [adr] IF_DEF(RRW_ARD, IS_AM_RD|IS_R1_RW, AMD ) // r/w reg , read [adr] +IF_DEF(RWR_RRD_ARD, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD ) // write reg , read reg2, read [adr] + IF_DEF(ARD_RRD, IS_AM_RD|IS_R1_RD, AMD ) // read [adr], read reg IF_DEF(AWR_RRD, IS_AM_WR|IS_R1_RD, AMD ) // write [adr], read reg IF_DEF(ARW_RRD, IS_AM_RW|IS_R1_RD, AMD ) // r/w [adr], read reg diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 80da26d4bf54..0bef8610c927 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -1998,8 +1998,8 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) // BT supports 16 bit operands and this code doesn't handle the necessary 66 prefix. assert(ins != INS_bt); - assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64 - || (attrSize == EA_16BYTE) // only for x64 + assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64 + || (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) // only for x64 || (ins == INS_movzx) || (ins == INS_movsx)); size = 3; } @@ -2588,6 +2588,8 @@ emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt) return IF_RWR_MRD; case IF_RRW_ARD: return IF_RRW_MRD; + case IF_RWR_RRD_ARD: + return IF_RWR_RRD_MRD; case IF_ARD_RRD: return IF_MRD_RRD; @@ -3889,6 +3891,82 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN emitCurIGsize += sz; } +void emitter::emitIns_R_A( + instruction ins, emitAttr attr, regNumber reg1, regNumber baseReg, regNumber indxReg, size_t scale, ssize_t offs) +{ + instrDesc* id = emitNewInstrAmd(attr, offs); + id->idIns(ins); + id->idInsFmt(IF_RRW_ARD); + id->idReg1(reg1); + id->idAddr()->iiaAddrMode.amBaseReg = baseReg; + id->idAddr()->iiaAddrMode.amIndxReg = indxReg; + id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(scale); + + assert(emitGetInsAmdAny(id) == offs); + + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); + id->idCodeSize(sz); + + dispIns(id); + emitCurIGsize += sz; +} + +void emitter::emitIns_R_R_A(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber baseReg, + regNumber indxReg, + size_t scale, + ssize_t offs) +{ + assert(IsSSEOrAVXInstruction(ins)); + assert(IsThreeOperandAVXInstruction(ins)); + + instrDesc* id = emitNewInstrAmd(attr, offs); + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); + + id->idIns(ins); + id->idInsFmt(IF_RWR_RRD_ARD); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaAddrMode.amBaseReg = baseReg; + id->idAddr()->iiaAddrMode.amIndxReg = indxReg; + id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(scale); + + assert(emitGetInsAmdAny(id) == offs); + + id->idCodeSize(sz); + dispIns(id); + emitCurIGsize += sz; +} + +void emitter::emitIns_R_R_C( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs) +{ + assert(IsSSEOrAVXInstruction(ins)); + assert(IsThreeOperandAVXInstruction(ins)); + + // Static always need relocs + if (!jitStaticFldIsGlobAddr(fldHnd)) + { + attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); + } + + instrDesc* id = emitNewInstrDsp(attr, offs); + UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); + + id->idIns(ins); + id->idInsFmt(IF_RWR_RRD_MRD); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaFieldHnd = fldHnd; + + id->idCodeSize(sz); + dispIns(id); + emitCurIGsize += sz; +} + /***************************************************************************** * * Add an instruction with three register operands. @@ -3915,6 +3993,30 @@ void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, emitCurIGsize += sz; } +void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs) +{ + assert(IsSSEOrAVXInstruction(ins)); + assert(IsThreeOperandAVXInstruction(ins)); + + instrDesc* id = emitNewInstr(attr); + UNATIVE_OFFSET sz = + emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); + + id->idIns(ins); + id->idInsFmt(IF_RWR_RRD_SRD); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + +#ifdef DEBUG + id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; +#endif + + id->idCodeSize(sz); + dispIns(id); + emitCurIGsize += sz; +} + /********************************************************************************** * emitIns_R_R_R_I: Add an instruction with three register operands and an immediate. * @@ -4888,9 +4990,49 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu } #if FEATURE_HW_INTRINSICS +void emitter::emitIns_SIMD_R_R_A(instruction ins, + regNumber reg, + regNumber reg1, + regNumber baseReg, + regNumber indxReg, + size_t scale, + ssize_t offs, + var_types simdtype) +{ + if (UseVEXEncoding()) + { + emitIns_R_R_A(ins, emitTypeSize(simdtype), reg, reg1, baseReg, indxReg, scale, offs); + } + else + { + if (reg1 != reg) + { + emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); + } + emitIns_R_A(ins, emitTypeSize(simdtype), reg, baseReg, indxReg, scale, offs); + } +} + +void emitter::emitIns_SIMD_R_R_C( + instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype) +{ + if (UseVEXEncoding()) + { + emitIns_R_R_C(ins, emitTypeSize(simdtype), reg, reg1, fldHnd, offs); + } + else + { + if (reg1 != reg) + { + emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); + } + emitIns_R_C(ins, emitTypeSize(simdtype), reg, fldHnd, offs); + } +} + void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype) { - if (UseVEXEncoding() && reg1 != reg) + if (UseVEXEncoding()) { emitIns_R_R_R(ins, emitTypeSize(simdtype), reg, reg1, reg2); } @@ -4903,6 +5045,22 @@ void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg2); } } + +void emitter::emitIns_SIMD_R_R_S(instruction ins, regNumber reg, regNumber reg1, int varx, int offs, var_types simdtype) +{ + if (UseVEXEncoding()) + { + emitIns_R_R_S(ins, emitTypeSize(simdtype), reg, reg1, varx, offs); + } + else + { + if (reg1 != reg) + { + emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); + } + emitIns_R_S(ins, emitTypeSize(simdtype), reg, varx, offs); + } +} #endif /***************************************************************************** @@ -6918,6 +7076,11 @@ void emitter::emitDispIns( emitDispAddrMode(id); break; + case IF_RWR_RRD_ARD: + printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); + emitDispAddrMode(id); + break; + case IF_ARD_RRD: case IF_AWR_RRD: case IF_ARW_RRD: @@ -7061,6 +7224,12 @@ void emitter::emitDispIns( break; + case IF_RWR_RRD_SRD: + printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); + emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), + id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + break; + case IF_RRD_RRD: case IF_RWR_RRD: case IF_RRW_RRD: @@ -7189,6 +7358,12 @@ void emitter::emitDispIns( emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); break; + case IF_RWR_RRD_MRD: + printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); + offs = emitGetInsDsp(id); + emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + break; + case IF_RWR_MRD_OFF: printf("%s, %s", emitRegName(id->idReg1(), attr), "offset"); @@ -7635,12 +7810,17 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Therefore, add VEX prefix is one is not already present. code = AddVexPrefixIfNeededAndNotPresent(ins, code, size); - // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsDstDstSrcAVXInstruction(ins)) { - // Encode source operand reg in 'vvvv' bits in 1's complement form - // The order of operands are reversed, therefore use reg2 as the source. - code = insEncodeReg3456(ins, id->idReg1(), size, code); + regNumber src1 = id->idReg2(); + + if (id->idInsFmt() != IF_RWR_RRD_ARD) + { + src1 = id->idReg1(); + } + + // encode source operand reg in 'vvvv' bits in 1's compliement form + code = insEncodeReg3456(ins, src1, size, code); } // Emit the REX prefix if required @@ -10988,6 +11168,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RRD_ARD: case IF_RWR_ARD: case IF_RRW_ARD: + case IF_RWR_RRD_ARD: code = insCodeRM(ins); code = AddVexPrefixIfNeeded(ins, code, size); regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); @@ -11082,6 +11263,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RRD_SRD: case IF_RWR_SRD: case IF_RRW_SRD: + case IF_RWR_RRD_SRD: code = insCodeRM(ins); // 4-byte AVX instructions are special cased inside emitOutputSV @@ -11094,16 +11276,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { code = AddVexPrefixIfNeeded(ins, code, size); - // In case of AVX instructions that take 3 operands, encode reg1 as first source. - // Note that reg1 is both a source and a destination. - // - // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For - // now we use the single source as source1 and source2. - // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsDstDstSrcAVXInstruction(ins)) { + regNumber src1 = id->idReg2(); + + if (id->idInsFmt() != IF_RWR_RRD_SRD) + { + src1 = id->idReg1(); + } + // encode source operand reg in 'vvvv' bits in 1's compliement form - code = insEncodeReg3456(ins, id->idReg1(), size, code); + code = insEncodeReg3456(ins, src1, size, code); } regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); @@ -11165,6 +11348,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RRD_MRD: case IF_RWR_MRD: case IF_RRW_MRD: + case IF_RWR_RRD_MRD: code = insCodeRM(ins); // Special case 4-byte AVX instructions if (Is4ByteAVXInstruction(ins)) @@ -11175,16 +11359,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { code = AddVexPrefixIfNeeded(ins, code, size); - // In case of AVX instructions that take 3 operands, encode reg1 as first source. - // Note that reg1 is both a source and a destination. - // - // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For - // now we use the single source as source1 and source2. - // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsDstDstSrcAVXInstruction(ins)) { + regNumber src1 = id->idReg2(); + + if (id->idInsFmt() != IF_RWR_RRD_MRD) + { + src1 = id->idReg1(); + } + // encode source operand reg in 'vvvv' bits in 1's compliement form - code = insEncodeReg3456(ins, id->idReg1(), size, code); + code = insEncodeReg3456(ins, src1, size, code); } regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h index 746c26b954b4..0eac47709116 100644 --- a/src/jit/emitxarch.h +++ b/src/jit/emitxarch.h @@ -365,6 +365,23 @@ void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2) void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival); +void emitIns_R_A( + instruction ins, emitAttr attr, regNumber reg1, regNumber baseReg, regNumber indxReg, size_t scale, ssize_t offs); + +void emitIns_R_R_A(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber baseReg, + regNumber indxReg, + size_t scale, + ssize_t offs); + +void emitIns_R_R_C( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs); + +void emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs); + void emitIns_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3); void emitIns_R_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, int ival); @@ -424,7 +441,18 @@ void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp); #if FEATURE_HW_INTRINSICS +void emitIns_SIMD_R_R_A(instruction ins, + regNumber reg, + regNumber reg1, + regNumber baseReg, + regNumber indxReg, + size_t scale, + ssize_t offs, + var_types simdtype); +void emitIns_SIMD_R_R_C( + instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype); void emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype); +void emitIns_SIMD_R_R_S(instruction ins, regNumber reg, regNumber reg1, int varx, int offs, var_types simdtype); #endif #if FEATURE_STACK_FP_X87 diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 66dbfc9c403a..88c260b939af 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -81,57 +81,141 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } } -void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) +void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) { - NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - regNumber targetReg = node->gtRegNum; - var_types targetType = node->TypeGet(); - var_types baseType = node->gtSIMDBaseType; + var_types targetType = node->TypeGet(); + regNumber targetReg = node->gtRegNum; + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + emitter* emit = getEmitter(); + + // TODO-XArch-CQ: Commutative operations can have op1 be contained + // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained regNumber op1Reg = op1->gtRegNum; - regNumber op2Reg = REG_NA; - emitter* emit = getEmitter(); + + assert(targetReg != REG_NA); + assert(op1Reg != REG_NA); genConsumeOperands(node); + if (op2->isContained() || op2->isUsedFromSpillTemp()) + { + unsigned varNum = BAD_VAR_NUM; + unsigned offset = (unsigned)-1; + + if (op2->isIndir()) + { + assert(op2->isContained()); + + GenTreeIndir* mem = op2->AsIndir(); + GenTree* memBase = mem->Base(); + + switch (memBase->OperGet()) + { + case GT_LCL_VAR_ADDR: + { + varNum = memBase->AsLclVarCommon()->GetLclNum(); + offset = 0; + break; + } + + case GT_CLS_VAR_ADDR: + { + emit->emitIns_SIMD_R_R_C(ins, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0, targetType); + return; + } + + default: + { + regNumber indxReg = mem->HasIndex() ? mem->Index()->gtRegNum : REG_NA; + emit->emitIns_SIMD_R_R_A(ins, targetReg, op1Reg, memBase->gtRegNum, indxReg, mem->Scale(), + mem->Offset(), targetType); + return; + } + } + } + else + { + switch (op2->OperGet()) + { + case GT_LCL_FLD: + { + GenTreeLclFld* lclField = op2->AsLclFld(); + + varNum = lclField->GetLclNum(); + offset = lclField->gtLclFld.gtLclOffs; + break; + } + + case GT_LCL_VAR: + { + assert(op2->IsRegOptional() || !compiler->lvaTable[op2->gtLclVar.gtLclNum].lvIsRegCandidate()); + + varNum = op2->AsLclVar()->GetLclNum(); + offset = 0; + break; + } + + default: + { + assert(op2->isUsedFromSpillTemp()); + assert(op2->IsRegOptional()); + + TempDsc* tmpDsc = getSpillTempDsc(op2); + + varNum = tmpDsc->tdTempNum(); + offset = 0; + + compiler->tmpRlsTemp(tmpDsc); + break; + } + } + } + + emit->emitIns_SIMD_R_R_S(ins, targetReg, op1Reg, varNum, offset, targetType); + } + else + { + emit->emitIns_SIMD_R_R_R(ins, targetReg, op1Reg, op2->gtRegNum, targetType); + } + + genProduceReg(node); +} + +void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) +{ + NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; + instruction ins = INS_invalid; + switch (intrinsicID) { case NI_SSE_Add: - assert(baseType == TYP_FLOAT); - op2Reg = op2->gtRegNum; - emit->emitIns_SIMD_R_R_R(INS_addps, targetReg, op1Reg, op2Reg, TYP_SIMD16); + { + assert(node->TypeGet() == TYP_SIMD16); + assert(node->gtSIMDBaseType == TYP_FLOAT); + genHWIntrinsic_R_R_RM(node, INS_addps); break; + } + default: unreached(); break; } - genProduceReg(node); } void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - regNumber targetReg = node->gtRegNum; - var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; - - regNumber op1Reg = op1->gtRegNum; - regNumber op2Reg = REG_NA; - emitter* emit = getEmitter(); - - genConsumeOperands(node); + instruction ins = INS_invalid; switch (intrinsicID) { case NI_SSE2_Add: { - op2Reg = op2->gtRegNum; + assert(node->TypeGet() == TYP_SIMD16); - instruction ins; switch (baseType) { case TYP_DOUBLE: @@ -158,14 +242,14 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) break; } - emit->emitIns_SIMD_R_R_R(ins, targetReg, op1Reg, op2Reg, TYP_SIMD16); + genHWIntrinsic_R_R_RM(node, ins); break; } + default: unreached(); break; } - genProduceReg(node); } void CodeGen::genSSE3Intrinsic(GenTreeHWIntrinsic* node) @@ -228,25 +312,15 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node) void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - regNumber targetReg = node->gtRegNum; - var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; + instruction ins = INS_invalid; - regNumber op1Reg = op1->gtRegNum; - regNumber op2Reg = REG_NA; - - genConsumeOperands(node); - - emitter* emit = getEmitter(); switch (intrinsicID) { case NI_AVX_Add: { - op2Reg = op2->gtRegNum; + assert(node->TypeGet() == TYP_SIMD32); - instruction ins; switch (baseType) { case TYP_DOUBLE: @@ -260,38 +334,28 @@ void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node) break; } - emit->emitIns_R_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg, op2Reg); + genHWIntrinsic_R_R_RM(node, ins); break; } + default: unreached(); break; } - genProduceReg(node); } void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - regNumber targetReg = node->gtRegNum; - var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; + instruction ins = INS_invalid; - regNumber op1Reg = op1->gtRegNum; - regNumber op2Reg = REG_NA; - - genConsumeOperands(node); - - emitter* emit = getEmitter(); switch (intrinsicID) { case NI_AVX2_Add: { - op2Reg = op2->gtRegNum; + assert(node->TypeGet() == TYP_SIMD32); - instruction ins; switch (baseType) { case TYP_INT: @@ -315,14 +379,14 @@ void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node) break; } - emit->emitIns_R_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg, op2Reg); + genHWIntrinsic_R_R_RM(node, ins); break; } + default: unreached(); break; } - genProduceReg(node); } void CodeGen::genAESIntrinsic(GenTreeHWIntrinsic* node) diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 38cfed63e2c0..a2990baf63a3 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -2301,11 +2301,38 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; - GenTree* op1 = node->gtOp.gtOp1; - GenTree* op2 = node->gtOp.gtOp2; + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); switch (node->gtHWIntrinsicId) { + case NI_SSE_Add: + case NI_SSE2_Add: + if (!comp->getEmitter()->UseVEXEncoding()) + { + // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained + // TODO-XArch-CQ: Non-VEX encoded instructions require memory ops to be aligned + break; + } + __fallthrough; + + case NI_AVX_Add: + case NI_AVX2_Add: + { + assert(comp->getEmitter()->UseVEXEncoding()); + + if (IsContainableMemoryOp(op2)) + { + MakeSrcContained(node, op2); + } + else + { + // TODO-XArch-CQ: Commutative operations can have op1 be contained + op2->SetRegOptional(); + } + break; + } + default: assert((intrinsicID > NI_HW_INTRINSIC_START) && (intrinsicID < NI_HW_INTRINSIC_END)); break; From 6804e9940ac0dda3c935379188af6a95d84574df Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 11 Jan 2018 14:03:39 -0800 Subject: [PATCH 2/3] Adding asserts that values are as expected for certain containment checks --- src/jit/hwintrinsiccodegenxarch.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 88c260b939af..aa65222679f8 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -101,6 +101,7 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) if (op2->isContained() || op2->isUsedFromSpillTemp()) { + TempDsc* tmpDsc = nullptr; unsigned varNum = BAD_VAR_NUM; unsigned offset = (unsigned)-1; @@ -117,6 +118,13 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) { varNum = memBase->AsLclVarCommon()->GetLclNum(); offset = 0; + + // Ensure that all the GenTreeIndir values are set to their defaults. + assert(memBase->gtRegNum == REG_NA); + assert(!mem->HasIndex()); + assert(mem->Scale() == 1); + assert(mem->Offset() == 0); + break; } @@ -162,8 +170,7 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) assert(op2->isUsedFromSpillTemp()); assert(op2->IsRegOptional()); - TempDsc* tmpDsc = getSpillTempDsc(op2); - + tmpDsc = getSpillTempDsc(op2); varNum = tmpDsc->tdTempNum(); offset = 0; @@ -173,6 +180,12 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) } } + // Ensure we got a good varNum and offset. + // We also need to check for `tmpDsc != nullptr` since spill temp numbers + // are negative and start with -1, which also happens to be BAD_VAR_NUM. + assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr)); + assert(offset != (unsigned)-1); + emit->emitIns_SIMD_R_R_S(ins, targetReg, op1Reg, varNum, offset, targetType); } else From 592aa820859c894b179354d1652d359e1bb3145b Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 11 Jan 2018 17:53:32 -0800 Subject: [PATCH 3/3] Fixing the hwintrin codgen containment checks --- src/jit/emitxarch.cpp | 49 ++++++++++------------------- src/jit/emitxarch.h | 23 +++----------- src/jit/hwintrinsiccodegenxarch.cpp | 38 ++++++++++------------ 3 files changed, 37 insertions(+), 73 deletions(-) diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 0bef8610c927..2f7f8e615d31 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -3891,18 +3891,15 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN emitCurIGsize += sz; } -void emitter::emitIns_R_A( - instruction ins, emitAttr attr, regNumber reg1, regNumber baseReg, regNumber indxReg, size_t scale, ssize_t offs) +void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt) { - instrDesc* id = emitNewInstrAmd(attr, offs); + ssize_t offs = indir->Offset(); + instrDesc* id = emitNewInstrAmd(attr, offs); + id->idIns(ins); - id->idInsFmt(IF_RRW_ARD); id->idReg1(reg1); - id->idAddr()->iiaAddrMode.amBaseReg = baseReg; - id->idAddr()->iiaAddrMode.amIndxReg = indxReg; - id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(scale); - assert(emitGetInsAmdAny(id) == offs); + emitHandleMemOp(indir, id, fmt, ins); UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); @@ -3911,32 +3908,24 @@ void emitter::emitIns_R_A( emitCurIGsize += sz; } -void emitter::emitIns_R_R_A(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber baseReg, - regNumber indxReg, - size_t scale, - ssize_t offs) +void emitter::emitIns_R_R_A( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insFormat fmt) { assert(IsSSEOrAVXInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); - instrDesc* id = emitNewInstrAmd(attr, offs); - UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); + ssize_t offs = indir->Offset(); + instrDesc* id = emitNewInstrAmd(attr, offs); id->idIns(ins); - id->idInsFmt(IF_RWR_RRD_ARD); id->idReg1(reg1); id->idReg2(reg2); - id->idAddr()->iiaAddrMode.amBaseReg = baseReg; - id->idAddr()->iiaAddrMode.amIndxReg = indxReg; - id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(scale); - assert(emitGetInsAmdAny(id) == offs); + emitHandleMemOp(indir, id, fmt, ins); + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); id->idCodeSize(sz); + dispIns(id); emitCurIGsize += sz; } @@ -4990,18 +4979,12 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu } #if FEATURE_HW_INTRINSICS -void emitter::emitIns_SIMD_R_R_A(instruction ins, - regNumber reg, - regNumber reg1, - regNumber baseReg, - regNumber indxReg, - size_t scale, - ssize_t offs, - var_types simdtype) +void emitter::emitIns_SIMD_R_R_A( + instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype) { if (UseVEXEncoding()) { - emitIns_R_R_A(ins, emitTypeSize(simdtype), reg, reg1, baseReg, indxReg, scale, offs); + emitIns_R_R_A(ins, emitTypeSize(simdtype), reg, reg1, indir, IF_RWR_RRD_ARD); } else { @@ -5009,7 +4992,7 @@ void emitter::emitIns_SIMD_R_R_A(instruction ins, { emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1); } - emitIns_R_A(ins, emitTypeSize(simdtype), reg, baseReg, indxReg, scale, offs); + emitIns_R_A(ins, emitTypeSize(simdtype), reg, indir, IF_RRW_ARD); } } diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h index 0eac47709116..c30183385711 100644 --- a/src/jit/emitxarch.h +++ b/src/jit/emitxarch.h @@ -365,17 +365,9 @@ void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2) void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival); -void emitIns_R_A( - instruction ins, emitAttr attr, regNumber reg1, regNumber baseReg, regNumber indxReg, size_t scale, ssize_t offs); - -void emitIns_R_R_A(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber baseReg, - regNumber indxReg, - size_t scale, - ssize_t offs); +void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt); + +void emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insFormat fmt); void emitIns_R_R_C( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs); @@ -441,14 +433,7 @@ void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp); #if FEATURE_HW_INTRINSICS -void emitIns_SIMD_R_R_A(instruction ins, - regNumber reg, - regNumber reg1, - regNumber baseReg, - regNumber indxReg, - size_t scale, - ssize_t offs, - var_types simdtype); +void emitIns_SIMD_R_R_A(instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, var_types simdtype); void emitIns_SIMD_R_R_C( instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype); void emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype); diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index aa65222679f8..23ebad42878d 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -105,12 +105,20 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) unsigned varNum = BAD_VAR_NUM; unsigned offset = (unsigned)-1; - if (op2->isIndir()) + if (op2->isUsedFromSpillTemp()) { - assert(op2->isContained()); + assert(op2->IsRegOptional()); - GenTreeIndir* mem = op2->AsIndir(); - GenTree* memBase = mem->Base(); + tmpDsc = getSpillTempDsc(op2); + varNum = tmpDsc->tdTempNum(); + offset = 0; + + compiler->tmpRlsTemp(tmpDsc); + } + else if (op2->isIndir()) + { + GenTreeIndir* memIndir = op2->AsIndir(); + GenTree* memBase = memIndir->gtOp1; switch (memBase->OperGet()) { @@ -121,9 +129,9 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) // Ensure that all the GenTreeIndir values are set to their defaults. assert(memBase->gtRegNum == REG_NA); - assert(!mem->HasIndex()); - assert(mem->Scale() == 1); - assert(mem->Offset() == 0); + assert(!memIndir->HasIndex()); + assert(memIndir->Scale() == 1); + assert(memIndir->Offset() == 0); break; } @@ -136,9 +144,7 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) default: { - regNumber indxReg = mem->HasIndex() ? mem->Index()->gtRegNum : REG_NA; - emit->emitIns_SIMD_R_R_A(ins, targetReg, op1Reg, memBase->gtRegNum, indxReg, mem->Scale(), - mem->Offset(), targetType); + emit->emitIns_SIMD_R_R_A(ins, targetReg, op1Reg, memIndir, targetType); return; } } @@ -159,24 +165,14 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) case GT_LCL_VAR: { assert(op2->IsRegOptional() || !compiler->lvaTable[op2->gtLclVar.gtLclNum].lvIsRegCandidate()); - varNum = op2->AsLclVar()->GetLclNum(); offset = 0; break; } default: - { - assert(op2->isUsedFromSpillTemp()); - assert(op2->IsRegOptional()); - - tmpDsc = getSpillTempDsc(op2); - varNum = tmpDsc->tdTempNum(); - offset = 0; - - compiler->tmpRlsTemp(tmpDsc); + unreached(); break; - } } }