diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 892d3f2def304f..4727d258de5235 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -8937,8 +8937,6 @@ void CodeGen::genAmd64EmitterUnitTestsApx() // INS_bt only has reg-to-reg form. theEmitter->emitIns_R_R(INS_bt, EA_2BYTE, REG_EAX, REG_EDX); - theEmitter->emitIns_R(INS_idiv, EA_8BYTE, REG_EDX); - theEmitter->emitIns_R_R(INS_xchg, EA_8BYTE, REG_EAX, REG_EDX); theEmitter->emitIns_R(INS_div, EA_8BYTE, REG_EDX); @@ -9069,8 +9067,6 @@ void CodeGen::genAmd64EmitterUnitTestsApx() theEmitter->emitIns_R(INS_imulEAX, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf); theEmitter->emitIns_R(INS_mulEAX, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf); - theEmitter->emitIns_R(INS_div, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf); - theEmitter->emitIns_R(INS_idiv, EA_8BYTE, REG_R12, INS_OPTS_EVEX_nf); theEmitter->emitIns_R_R(INS_tzcnt_apx, EA_8BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); theEmitter->emitIns_R_R(INS_lzcnt_apx, EA_8BYTE, REG_R12, REG_R11, INS_OPTS_EVEX_nf); @@ -9146,6 +9142,12 @@ void CodeGen::genAmd64EmitterUnitTestsApx() theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM16, false); theEmitter->emitIns_R(INS_seto_apx, EA_1BYTE, REG_R11, INS_OPTS_EVEX_zu); + theEmitter->emitIns_I_AR(INS_imul_09, EA_4BYTE, 0x8149a, REG_EAX, 0x14); + theEmitter->emitIns_I_AR(INS_imul_19, EA_4BYTE, 0x8149a, REG_EAX, 0x14); + theEmitter->emitIns_I_AR(INS_imul_19, EA_4BYTE, 0x8149a, REG_R16, 0x14); + theEmitter->emitIns_S_I(INS_imul_19, EA_4BYTE, 0, 20, 30); + theEmitter->emitIns_S_I(INS_imul_09, EA_4BYTE, 0, 20, 30); + theEmitter->emitIns_R_AR(INS_crc32_apx, EA_4BYTE, REG_R17, REG_EAX, 0x14); } void CodeGen::genAmd64EmitterUnitTestsAvx10v2() @@ -10049,7 +10051,7 @@ void CodeGen::genPushCalleeSavedRegisters() #endif // DEBUG #ifdef TARGET_AMD64 - if (m_compiler->canUseApxEvexEncoding() && JitConfig.EnableApxPPX()) + if (m_compiler->canUseApxEvexEncoding() && JitConfig.EnableApxPP2()) { genPushCalleeSavedRegistersFromMaskAPX(rsPushRegs); return; @@ -10064,7 +10066,13 @@ void CodeGen::genPushCalleeSavedRegisters() if ((regBit & rsPushRegs) != 0) { +#ifdef TARGET_AMD64 + insOpts instOptions = + (m_compiler->canUseApxEvexEncoding() && JitConfig.EnableApxPPHint()) ? INS_OPTS_APX_ppx : INS_OPTS_NONE; + GetEmitter()->emitIns_R(INS_push, EA_PTRSIZE, reg, instOptions); +#else inst_RV(INS_push, reg, TYP_REF); +#endif m_compiler->unwindPush(reg); rsPushRegs &= ~regBit; } @@ -10175,7 +10183,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) return; } - if (m_compiler->canUseApxEvexEncoding() && JitConfig.EnableApxPPX()) + if (m_compiler->canUseApxEvexEncoding() && JitConfig.EnableApxPP2()) { regMaskTP rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask(); const unsigned popCount = genPopCalleeSavedRegistersFromMaskAPX(rsPopRegs); @@ -10199,10 +10207,12 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs) { unsigned popCount = 0; + insOpts instOptions = + (m_compiler->canUseApxEvexEncoding() && JitConfig.EnableApxPPHint()) ? INS_OPTS_APX_ppx : INS_OPTS_NONE; if ((rsPopRegs & RBM_EBX) != 0) { popCount++; - inst_RV(INS_pop, REG_EBX, TYP_I_IMPL); + GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, REG_EBX, instOptions); } if ((rsPopRegs & RBM_FPBASE) != 0) { @@ -10210,7 +10220,7 @@ unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs) assert(!doubleAlignOrFramePointerUsed()); popCount++; - inst_RV(INS_pop, REG_EBP, TYP_I_IMPL); + GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, REG_EBP, instOptions); } #ifndef UNIX_AMD64_ABI @@ -10218,35 +10228,22 @@ unsigned CodeGen::genPopCalleeSavedRegistersFromMask(regMaskTP rsPopRegs) if ((rsPopRegs & RBM_ESI) != 0) { popCount++; - inst_RV(INS_pop, REG_ESI, TYP_I_IMPL); + GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, REG_ESI, instOptions); } if ((rsPopRegs & RBM_EDI) != 0) { popCount++; - inst_RV(INS_pop, REG_EDI, TYP_I_IMPL); + GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, REG_EDI, instOptions); } #endif // !defined(UNIX_AMD64_ABI) #ifdef TARGET_AMD64 - if ((rsPopRegs & RBM_R12) != 0) - { - popCount++; - inst_RV(INS_pop, REG_R12, TYP_I_IMPL); - } - if ((rsPopRegs & RBM_R13) != 0) - { - popCount++; - inst_RV(INS_pop, REG_R13, TYP_I_IMPL); - } - if ((rsPopRegs & RBM_R14) != 0) - { - popCount++; - inst_RV(INS_pop, REG_R14, TYP_I_IMPL); - } - if ((rsPopRegs & RBM_R15) != 0) + regMaskTP popRegs = rsPopRegs & (RBM_R12 | RBM_R13 | RBM_R14 | RBM_R15); + while (popRegs != RBM_NONE) { + regNumber reg = genFirstRegNumFromMaskAndToggle(popRegs); popCount++; - inst_RV(INS_pop, REG_R15, TYP_I_IMPL); + GetEmitter()->emitIns_R(INS_pop, EA_PTRSIZE, reg, instOptions); } #endif // TARGET_AMD64 diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 43d74e91d53ea7..b280087a6d1f00 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -2164,11 +2164,6 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt code |= EXTENDED_EVEX_PP_BITS; } - if (instrIsExtendedReg3opImul(ins)) - { - // EVEX.R3 - code &= 0xFF7FFFFFFFFFFFFFULL; - } #ifdef TARGET_AMD64 if (IsCCMP(ins)) { @@ -2381,9 +2376,16 @@ emitter::code_t emitter::AddRex2Prefix(instruction ins, code_t code) { assert(IsRex2EncodableInstruction(ins)); +#ifdef TARGET_AMD64 + if (ins >= INS_imul_08 && ins <= INS_imul_15) + { + // These instructions have a built-in REX prefix, so it needs to be zeroed out when adding the prefix. + code &= 0xFFFFFFFFULL; + } +#endif + // Note that there are cases that some register field might be filled before adding prefix, // So we don't check if the code has REX2 prefix already or not. - code |= DEFAULT_2BYTE_REX2_PREFIX; if (IsLegacyMap1(code)) // 2-byte opcode on Map-1 { @@ -7189,6 +7191,19 @@ void emitter::emitIns_R_I(instruction ins, // ACC form is not promoted into EVEX space, need to emit with MI form. sz += 1; } + + if (ins == INS_test && reg == REG_EAX && TakesRex2Prefix(id)) + { + // test eax/rax will use ACC form, which is not REX2 compatible. + if (size == EA_8BYTE) + { + sz -= 1; + } + else + { + sz -= 2; + } + } #endif // TARGET_AMD64 // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a @@ -14715,11 +14730,13 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) case EA_4BYTE: #ifdef TARGET_AMD64 case EA_8BYTE: + // EVEX.MOVBE is assigned with RM opcode that does not follow the following rule. + if (ins != INS_movbe_apx) #endif - - /* Set the 'w' bit to get the large version */ - - code |= 0x1; + { + /* Set the 'w' bit to get the large version */ + code |= 0x1; + } break; #ifdef TARGET_X86 @@ -14737,9 +14754,11 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) break; } #ifdef TARGET_AMD64 - if (ins == INS_crc32_apx || ins == INS_movbe_apx) + if (ins >= INS_imul_08 && ins <= INS_imul_31) { - code |= (insEncodeReg345(id, id->idReg1(), size, &code) << 8); + // The built-in REX has been zeroed out in AddX86PrefixIfNeededAndNotPresent, need to add the register + // addressing bits in the prefix. + insEncodeReg345(id, inst3opImulReg(ins), size, &code); } #endif // TARGET_AMD64 } @@ -15626,11 +15645,11 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) break; } #ifdef TARGET_AMD64 - if (ins == INS_crc32_apx || ins == INS_movbe_apx) + if (ins >= INS_imul_08 && ins <= INS_imul_31) { - // The promoted CRC32 is in 1-byte opcode, unlike other instructions on this path, the register encoding for - // CRC32 need to be done here. - code |= (insEncodeReg345(id, id->idReg1(), size, &code) << 8); + // The built-in REX has been zero-ed out in AddX86PrefixIfNeededAndNotPresent, need to add the register + // addressing bits in the prefix. + insEncodeReg345(id, inst3opImulReg(ins), size, &code); } #endif // TARGET_AMD64 } @@ -17445,6 +17464,12 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) code = insCodeMI(ins); code = AddX86PrefixIfNeeded(id, code, size); code = insEncodeMIreg(id, reg, size, code); +#ifdef TARGET_AMD64 + if (ins >= INS_imul_08 && ins <= INS_imul_31) + { + insEncodeReg345(id, inst3opImulReg(ins), size, &code); + } +#endif // TARGET_AMD64 } } diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index fdadb2b2f52afe..53e3e28d3adc7d 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -102,24 +102,24 @@ INST3(movsx, "movsx", IUM_WR, BAD_CODE, BAD_CODE, #ifdef TARGET_AMD64 INST3(movsxd, "movsxd", IUM_WR, BAD_CODE, BAD_CODE, 0x000063, ZERO, 4X, INS_TT_NONE, REX_W1 | Encoding_REX2) #endif -INST3(movzx, "movzx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00B6, ZERO, 4X, INS_TT_NONE, INS_FLAGS_Has_Wbit | Encoding_REX2) - -INST3(cmovo, "cmovo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, 1C, 2X, INS_TT_NONE, Reads_OF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovno, "cmovno", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, 1C, 2X, INS_TT_NONE, Reads_OF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, 1C, 2X, INS_TT_NONE, Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, 1C, 2X, INS_TT_NONE, Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, 1C, 2X, INS_TT_NONE, Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, 1C, 2X, INS_TT_NONE, Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, 1C, 2X, INS_TT_NONE, Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, 1C, 2X, INS_TT_NONE, Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovp, "cmovp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, 1C, 2X, INS_TT_NONE, Reads_PF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovnp, "cmovnp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, 1C, 2X, INS_TT_NONE, Reads_PF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD) -INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(movzx, "movzx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00B6, ZERO, 4X, INS_TT_NONE, INS_FLAGS_Has_Wbit | Encoding_REX2) + +INST3(cmovo, "cmovo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, 1C, 2X, INS_TT_NONE, Reads_OF | Encoding_REX2) +INST3(cmovno, "cmovno", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, 1C, 2X, INS_TT_NONE, Reads_OF | Encoding_REX2) +INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, 1C, 2X, INS_TT_NONE, Reads_CF | Encoding_REX2) +INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, 1C, 2X, INS_TT_NONE, Reads_CF | Encoding_REX2) +INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, 1C, 2X, INS_TT_NONE, Reads_ZF | Encoding_REX2) +INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, 1C, 2X, INS_TT_NONE, Reads_ZF | Encoding_REX2) +INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | Encoding_REX2) +INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | Encoding_REX2) +INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, 1C, 2X, INS_TT_NONE, Reads_SF | Encoding_REX2) +INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, 1C, 2X, INS_TT_NONE, Reads_SF | Encoding_REX2) +INST3(cmovp, "cmovp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, 1C, 2X, INS_TT_NONE, Reads_PF | Encoding_REX2) +INST3(cmovnp, "cmovnp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, 1C, 2X, INS_TT_NONE, Reads_PF | Encoding_REX2) +INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Encoding_REX2) +INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Encoding_REX2) +INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | Encoding_REX2) +INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | Encoding_REX2) INST3(xchg, "xchg", IUM_RW, 0x000086, BAD_CODE, 0x000086, ILLEGAL, ILLEGAL, INS_TT_NONE, INS_FLAGS_Has_Wbit | Encoding_REX2) INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, 3C, 1C, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NDD | INS_Flags_Has_NF | Encoding_REX2) @@ -1279,9 +1279,9 @@ INST1(serialize, "serialize", IUM_RD, 0x0fe801, INST1(cwde, "cwde", IUM_RD, 0x000098, 1C, 4X, INS_TT_NONE, INS_FLAGS_HasPseudoName) INST1(cdq, "cdq", IUM_RD, 0x000099, 1C, 2X, INS_TT_NONE, INS_FLAGS_HasPseudoName) -INST1(idiv, "idiv", IUM_RD, 0x0038F6, ILLEGAL, ILLEGAL, INS_TT_NONE, Undefined_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Undefined_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF) +INST1(idiv, "idiv", IUM_RD, 0x0038F6, ILLEGAL, ILLEGAL, INS_TT_NONE, Undefined_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Undefined_CF | INS_FLAGS_Has_Wbit) INST1(imulEAX, "imul", IUM_RD, 0x0028F6, 4C, 1C, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF) -INST1(div, "div", IUM_RD, 0x0030F6, ILLEGAL, ILLEGAL, INS_TT_NONE, Undefined_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Undefined_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF) +INST1(div, "div", IUM_RD, 0x0030F6, ILLEGAL, ILLEGAL, INS_TT_NONE, Undefined_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Undefined_CF | INS_FLAGS_Has_Wbit) INST1(mulEAX, "mul", IUM_RD, 0x0020F6, 4C, 1C, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Wbit | Encoding_REX2 | INS_Flags_Has_NF) INST1(sahf, "sahf", IUM_RD, 0x00009E, ILLEGAL, ILLEGAL, INS_TT_NONE, Restore_SF_ZF_AF_PF_CF) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 44cbd6d0e5eb71..0ce1e994c2daa9 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -444,7 +444,8 @@ RELEASE_CONFIG_INTEGER(EnableEmbeddedBroadcast, "EnableEmbeddedBroadcast", RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, "EnableEmbeddedMasking", 1) // Allows embedded masking to be disabled RELEASE_CONFIG_INTEGER(EnableApxNDD, "EnableApxNDD", 0) // Allows APX NDD feature to be disabled RELEASE_CONFIG_INTEGER(EnableApxConditionalChaining, "EnableApxConditionalChaining", 0) // Allows APX conditional compare chaining -RELEASE_CONFIG_INTEGER(EnableApxPPX, "EnableApxPPX", 0) // Allows APX PPX feature to be disabled +RELEASE_CONFIG_INTEGER(EnableApxPPHint, "EnableApxPPHint", 0) // Allows APX PPX Hint feature to be disabled +RELEASE_CONFIG_INTEGER(EnableApxPP2, "EnableApxPP2", 0) // Allows APX PP2 feature to be disabled RELEASE_CONFIG_INTEGER(EnableApxZU, "EnableApxZU", 0) // Allows APX ZU feature to be disabled // clang-format on diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 61fef20d231ccc..a583c0956608a4 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -297,7 +297,7 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) ContainCheckBinary(binOp); #ifdef TARGET_AMD64 - if (JitConfig.EnableApxConditionalChaining()) + if (m_compiler->canUseApxEvexEncoding() && JitConfig.EnableApxConditionalChaining()) { if (binOp->OperIs(GT_AND, GT_OR)) { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 947e838fcd799a..43e094c789c35d 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1928,7 +1928,7 @@ int LinearScan::BuildModDiv(GenTree* tree) tgtPrefUse = op1Use; srcCount = 1; } - srcCount += BuildDelayFreeUses(op2, op1, availableIntRegs & ~(SRBM_RAX | SRBM_RDX)); + srcCount += BuildDelayFreeUses(op2, op1, lowGprRegs & ~(SRBM_RAX | SRBM_RDX)); buildInternalRegisterUses(); diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index c9ca63a77751ad..54f2b5bc4203be 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1659,7 +1659,8 @@ PhaseStatus Compiler::optOptimizeBools() // trigger or not // else if ((compOpportunisticallyDependsOn(InstructionSet_APX) || JitConfig.JitEnableApxIfConv()) && // optBoolsDsc.optOptimizeCompareChainCondBlock()) - else if (JitConfig.EnableApxConditionalChaining() && !optSwitchDetectAndConvert(b1, true, &ccmpVec) && + else if (canUseApxEvexEncoding() && JitConfig.EnableApxConditionalChaining() && + !optSwitchDetectAndConvert(b1, true, &ccmpVec) && optBoolsDsc.optOptimizeCompareChainCondBlock()) { // The optimization will have merged b1 and b2. Retry the loop so that