From ec19c1294d52d45357ec6e2d7cfe8cc4222b39e7 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 9 Dec 2022 09:57:01 -0800 Subject: [PATCH 01/15] Remove the dead emitOutputRexOrVexPrefixIfNeeded and rename emitOutputSimdPrefixIfNeeded to emitOutputRexOrSimdPrefixIfNeeded --- src/coreclr/jit/emitxarch.cpp | 300 ++++------------------------------ src/coreclr/jit/emitxarch.h | 3 +- 2 files changed, 31 insertions(+), 272 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 19c0ea4b480871..7f03582ea4a7c3 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1280,7 +1280,7 @@ bool isPrefix(BYTE b) } //------------------------------------------------------------------------ -// emitOutputSimdPrefixIfNeeded: Outputs EVEX prefix (in case of AVX512 instructions), +// emitOutputRexOrSimdPrefixIfNeeded: Outputs EVEX prefix (in case of AVX512 instructions), // VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. // // Arguments: @@ -1292,7 +1292,7 @@ bool isPrefix(BYTE b) // Return Value: // Size of prefix. // -unsigned emitter::emitOutputSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) +unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) { // TODO-XArch-AVX512: Remove redundant code and collapse into single pathway for EVEX and VEX if possible. if (hasEvexPrefix(code)) @@ -1634,246 +1634,6 @@ unsigned emitter::emitOutputSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_ return 0; } -// Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. -unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) -{ - if (hasVexPrefix(code)) - { - // Only AVX instructions should have a VEX prefix - assert(UseVEXEncoding() && IsVexEncodedInstruction(ins)); - code_t vexPrefix = (code >> 32) & 0x00FFFFFF; - code &= 0x00000000FFFFFFFFLL; - - WORD leadingBytes = 0; - BYTE check = (code >> 24) & 0xFF; - if (check != 0) - { - // 3-byte opcode: with the bytes ordered as 0x2211RM33 or - // 4-byte opcode: with the bytes ordered as 0x22114433 - // check for a prefix in the 11 position - BYTE sizePrefix = (code >> 16) & 0xFF; - if ((sizePrefix != 0) && isPrefix(sizePrefix)) - { - // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits - // - // 00 - None (0F - packed float) - // 01 - 66 (66 0F - packed double) - // 10 - F3 (F3 0F - scalar float - // 11 - F2 (F2 0F - scalar double) - switch (sizePrefix) - { - case 0x66: - if (IsBMIInstruction(ins)) - { - switch (ins) - { - case INS_rorx: - case INS_pdep: - case INS_mulx: -// TODO: Unblock when enabled for x86 -#ifdef TARGET_AMD64 - case INS_shrx: -#endif - { - vexPrefix |= 0x03; - break; - } - - case INS_pext: -// TODO: Unblock when enabled for x86 -#ifdef TARGET_AMD64 - case INS_sarx: -#endif - { - vexPrefix |= 0x02; - break; - } -// TODO: Unblock when enabled for x86 -#ifdef TARGET_AMD64 - case INS_shlx: - { - vexPrefix |= 0x01; - break; - } -#endif - default: - { - vexPrefix |= 0x00; - break; - } - } - } - else - { - vexPrefix |= 0x01; - } - break; - case 0xF3: - vexPrefix |= 0x02; - break; - case 0xF2: - vexPrefix |= 0x03; - break; - default: - assert(!"unrecognized SIMD size prefix"); - unreached(); - } - - // Now the byte in the 22 position must be an escape byte 0F - leadingBytes = check; - assert(leadingBytes == 0x0F); - - // Get rid of both sizePrefix and escape byte - code &= 0x0000FFFFLL; - - // Check the byte in the 33 position to see if it is 3A or 38. - // In such a case escape bytes must be 0x0F3A or 0x0F38 - check = code & 0xFF; - if (check == 0x3A || check == 0x38) - { - leadingBytes = (leadingBytes << 8) | check; - code &= 0x0000FF00LL; - } - } - } - else - { - // 2-byte opcode with the bytes ordered as 0x0011RM22 - // the byte in position 11 must be an escape byte. - leadingBytes = (code >> 16) & 0xFF; - assert(leadingBytes == 0x0F || leadingBytes == 0x00); - code &= 0xFFFF; - } - - // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 - // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these - // implied leading bytes. 0x0F is supported by both the 2-byte and - // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by - // the 3-byte version. - - switch (leadingBytes) - { - case 0x00: - // there is no leading byte - break; - case 0x0F: - vexPrefix |= 0x0100; - break; - case 0x0F38: - vexPrefix |= 0x0200; - break; - case 0x0F3A: - vexPrefix |= 0x0300; - break; - default: - assert(!"encountered unknown leading bytes"); - unreached(); - } - - // At this point - // VEX.2211RM33 got transformed as VEX.0000RM33 - // VEX.0011RM22 got transformed as VEX.0000RM22 - // - // Now output VEX prefix leaving the 4-byte opcode - - // The 2-byte VEX encoding, requires that the X and B-bits are set (these - // bits are inverted from the REX values so set means off), the W-bit is - // not set (this bit is not inverted), and that the m-mmmm bits are 0-0001 - // (the 2-byte VEX encoding only supports the 0x0F leading byte). When these - // conditions are met, we can change byte-0 from 0xC4 to 0xC5 and then - // byte-1 is the logical-or of bit 7 from byte-1 and bits 0-6 from byte 2 - // from the 3-byte VEX encoding. - // - // Given the above, the check can be reduced to a simple mask and comparison. - // * 0xFFFF7F80 is a mask that ignores any bits whose value we don't care about: - // * R can be set or unset (0x7F ignores bit 7) - // * vvvv can be any value (0x80 ignores bits 3-6) - // * L can be set or unset (0x80 ignores bit 2) - // * pp can be any value (0x80 ignores bits 0-1) - // * 0x00C46100 is a value that signifies the requirements listed above were met: - // * We must be a three-byte VEX opcode (0x00C4) - // * X and B must be set (0x61 validates bits 5-6) - // * m-mmmm must be 0-00001 (0x61 validates bits 0-4) - // * W must be unset (0x00 validates bit 7) - if ((vexPrefix & 0xFFFF7F80) == 0x00C46100) - { - // Encoding optimization calculation is not done while estimating the instruction - // size and thus over-predict instruction size by 1 byte. - // If there are IGs that will be aligned, do not optimize encoding so the - // estimated alignment sizes are accurate. - if (emitCurIG->igNum > emitLastAlignedIgNum) - { - emitOutputByte(dst, 0xC5); - emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); - return 2; - } - } - - emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF)); - emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF)); - emitOutputByte(dst + 2, vexPrefix & 0xFF); - return 3; - } - -#ifdef TARGET_AMD64 - if (code > 0x00FFFFFFFFLL) - { - BYTE prefix = (code >> 32) & 0xFF; - noway_assert(prefix >= 0x40 && prefix <= 0x4F); - code &= 0x00000000FFFFFFFFLL; - - // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now) - // we can remove this code as well - - // The REX prefix is required to come after all other prefixes. - // Some of our 'opcodes' actually include some prefixes, if that - // is the case, shift them over and place the REX prefix after - // the other prefixes, and emit any prefix that got moved out. - BYTE check = (code >> 24) & 0xFF; - if (check == 0) - { - // 3-byte opcode: with the bytes ordered as 0x00113322 - // check for a prefix in the 11 position - check = (code >> 16) & 0xFF; - if (check != 0 && isPrefix(check)) - { - // Swap the rex prefix and whatever this prefix is - code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL)); - // and then emit the other prefix - return emitOutputByte(dst, check); - } - } - else - { - // 4-byte opcode with the bytes ordered as 0x22114433 - // first check for a prefix in the 11 position - BYTE check2 = (code >> 16) & 0xFF; - if (isPrefix(check2)) - { - assert(!isPrefix(check)); // We currently don't use this, so it is untested - if (isPrefix(check)) - { - // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX - // Change to c2rrc1XXXX, and emit check2 now - code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL)); - } - else - { - // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode) - // Change to c2XXrrXXXX, and emit check2 now - code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL)); - } - return emitOutputByte(dst, check2); - } - } - - return emitOutputByte(dst, prefix); - } -#endif // TARGET_AMD64 - - return 0; -} - #ifdef TARGET_AMD64 /***************************************************************************** * Is the last instruction emitted a call instruction? @@ -11339,7 +11099,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) if (id->idIsCallRegPtr()) { code = insEncodeMRreg(ins, reg, EA_PTRSIZE, code); - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code); goto DONE; } @@ -11365,7 +11125,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // And emit the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); #endif // TARGET_AMD64 @@ -11527,7 +11287,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } unsigned regcode = insEncodeReg345(ins, reg345, size, &code); - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (UseSimdEncoding() && (ins != INS_crc32)) { @@ -11555,7 +11315,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Output the highest word of the opcode // We need to check again as in case of AVX instructions leading opcode bytes are stripped off @@ -11572,7 +11332,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) assert(ins != INS_bt); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Output the highest byte of the opcode if (code & 0x00FF0000) @@ -11640,7 +11400,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Get the displacement value dsp = emitGetInsAmdAny(id); @@ -12341,7 +12101,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } unsigned regcode = insEncodeReg345(ins, reg345, size, &code); - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (UseSimdEncoding() && (ins != INS_crc32)) { @@ -12369,7 +12129,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Output the highest word of the opcode // We need to check again because in case of AVX instructions the leading @@ -12386,7 +12146,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) assert(ins != INS_bt); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Output the highest byte of the opcode. // We need to check again because in case of AVX instructions the leading @@ -12458,7 +12218,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Figure out the variable's frame position int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); @@ -12841,7 +12601,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } unsigned regcode = insEncodeReg345(ins, reg345, size, &code); - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (UseVEXEncoding() && (ins != INS_crc32)) { @@ -12871,7 +12631,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Output the highest word of the opcode. // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix. @@ -12884,7 +12644,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) else if (code & 0x00FF0000) { // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Check again as VEX prefix would have encoded leading opcode byte if (code & 0x00FF0000) @@ -12941,7 +12701,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (code) { @@ -13228,7 +12988,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) unsigned regcode = insEncodeReg012(ins, reg, size, &code); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code | (regcode << 8)); } @@ -13255,7 +13015,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) assert(!TakesRexWPrefix(ins, size)); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputByte(dst, code); break; @@ -13280,7 +13040,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) unsigned regcode = insEncodeReg012(ins, reg, size, &code); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code | (regcode << 8)); break; @@ -13309,7 +13069,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins)); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // We expect this to always be a 'big' opcode assert(code & 0x00FF0000); @@ -13354,7 +13114,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code); break; @@ -13590,7 +13350,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (code & 0xFF000000) { @@ -13844,7 +13604,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) code = insEncodeReg3456(ins, src1, size, code); // Output the REX/VEX/EVEX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Is this a 'big' opcode? if (code & 0xFF000000) @@ -13948,7 +13708,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8; // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (code & 0xFF000000) { @@ -13984,7 +13744,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) code = AddRexWPrefix(ins, code); } - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputByte(dst, code); if (size == EA_4BYTE) @@ -14113,7 +13873,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Does the value fit in a sign-extended byte? // Important! Only set the 's' bit when we have a size larger than EA_1BYTE. @@ -14315,7 +14075,7 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id) if (TakesRexWPrefix(ins, size) || (codeEvexMigrationCheck(code) && IsWEvexOpcodeExtension(ins))) { code = AddRexWPrefix(ins, code); - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); } dst += emitOutputByte(dst, code); @@ -14941,7 +14701,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { code = AddRexWPrefix(ins, code); } - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); #endif // Is this a 'big' opcode? if (code & 0xFF000000) @@ -15234,7 +14994,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutputByte(dst, 0x66); } - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code); dst += emitOutputByte(dst, emitGetInsSC(id)); sz = emitSizeOfInsDsc(id); @@ -15340,7 +15100,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code)); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (code & 0xFF000000) { diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 13f79bc3833cca..c5238685a6247e 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -65,8 +65,7 @@ BYTE* emitOutputRRR(BYTE* dst, instrDesc* id); BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* id); -unsigned emitOutputSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code); -unsigned emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code); +unsigned emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code); unsigned emitGetRexPrefixSize(instruction ins); unsigned emitGetVexPrefixSize(instruction ins, emitAttr attr); unsigned emitGetEvexPrefixSize(instruction ins); From 4d75d052dc27ed33f7977c2fc72742bb510999d9 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 9 Dec 2022 09:58:18 -0800 Subject: [PATCH 02/15] Remove the dead emitGetAdjustedSize and rename emitGetAdjustedSizeEvexAware to emitGetAdjustedSize --- src/coreclr/jit/emitxarch.cpp | 106 ++++------------------------------ src/coreclr/jit/emitxarch.h | 1 - 2 files changed, 11 insertions(+), 96 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 7f03582ea4a7c3..1f74aab45221a9 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1678,7 +1678,6 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins) //------------------------------------------------------------------------ // emitGetEvexPrefixSize: Gets Size of Evex prefix in bytes -// TODO-XArch-AVX512: Once evex encoding is supported fully, this will take the place of emitGetAdjustedSize() // // Arguments: // ins -- The instruction @@ -1710,9 +1709,8 @@ unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr) } //------------------------------------------------------------------------ -// emitGetAdjustedSizeEvexAware: Determines any size adjustment needed for a given instruction based on the current +// emitGetAdjustedSize: Determines any size adjustment needed for a given instruction based on the current // configuration. -// TODO-XArch-AVX512: Once evex encoding is supported fully, this will take the place of emitGetAdjustedSize() // // Arguments: // ins -- The instruction being emitted @@ -1722,7 +1720,7 @@ unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr) // Returns: // Updated size. // -unsigned emitter::emitGetAdjustedSizeEvexAware(instruction ins, emitAttr attr, code_t code) +unsigned emitter::emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code) { unsigned adjustedSize = 0; @@ -1847,88 +1845,6 @@ unsigned emitter::emitGetAdjustedSizeEvexAware(instruction ins, emitAttr attr, c return adjustedSize; } -//------------------------------------------------------------------------ -// emitGetAdjustedSize: Determines any size adjustment needed for a given instruction based on the current -// configuration. -// -// Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// code -- The current opcode and any known prefixes -unsigned emitter::emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code) -{ - unsigned adjustedSize = 0; - - if (IsVexEncodedInstruction(ins)) - { - // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. - // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always - // overstimate. - // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so - // that - // instruction size estimate will be accurate. - // Basically this will decrease the vexPrefixSize, so that opcodeSize + vexPrefixAdjustedSize will be the right - // size. - // - // rightOpcodeSize + vexPrefixSize - // = (opcodeSize - ExtrabytesSize) + vexPrefixSize - // = opcodeSize + (vexPrefixSize - ExtrabytesSize) - // = opcodeSize + vexPrefixAdjustedSize - - unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr); - assert(vexPrefixAdjustedSize == 3); - - // In this case, opcode will contains escape prefix at least one byte, - // vexPrefixAdjustedSize should be minus one. - vexPrefixAdjustedSize -= 1; - - // Get the fourth byte in Opcode. - // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not. - BYTE check = (code >> 24) & 0xFF; - if (check != 0) - { - // 3-byte opcode: with the bytes ordered as 0x2211RM33 or - // 4-byte opcode: with the bytes ordered as 0x22114433 - // Simd prefix is at the first byte. - BYTE sizePrefix = (code >> 16) & 0xFF; - if (sizePrefix != 0 && isPrefix(sizePrefix)) - { - vexPrefixAdjustedSize -= 1; - } - - // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode. - // But in this case the opcode has not counted R\M part. - // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize - //=opcodeSize + VexPrefixAdjustedSize -1 + 1 - //=opcodeSize + VexPrefixAdjustedSize - // So although we may have second byte escape prefix, we won't decrease vexPrefixAdjustedSize. - } - - adjustedSize = vexPrefixAdjustedSize; - } - else if (Is4ByteSSEInstruction(ins)) - { - // The 4-Byte SSE instructions require one additional byte to hold the ModRM byte - adjustedSize++; - } - else - { - if (ins == INS_crc32) - { - // Adjust code size for CRC32 that has 4-byte opcode but does not use SSE38 or EES3A encoding. - adjustedSize++; - } - - if ((attr == EA_2BYTE) && (ins != INS_movzx) && (ins != INS_movsx)) - { - // Most 16-bit operand instructions will need a 0x66 prefix. - adjustedSize++; - } - } - - return adjustedSize; -} - // //------------------------------------------------------------------------ // emitGetPrefixSize: Get size of rex or vex prefix emitted in code @@ -2842,7 +2758,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code) instruction ins = id->idIns(); emitAttr attr = id->idOpSize(); - UNATIVE_OFFSET sz = emitGetAdjustedSizeEvexAware(ins, attr, code); + UNATIVE_OFFSET sz = emitGetAdjustedSize(ins, attr, code); bool includeRexPrefixSize = true; // REX prefix @@ -2910,7 +2826,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re // This would probably be better expressed as a different format or something? code_t code = insCodeRM(ins); - UNATIVE_OFFSET sz = emitGetAdjustedSizeEvexAware(ins, size, insCodeRM(ins)); + UNATIVE_OFFSET sz = emitGetAdjustedSize(ins, size, insCodeRM(ins)); bool includeRexPrefixSize = true; // REX prefix @@ -3132,7 +3048,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); emitAttr attrSize = id->idOpSize(); - UNATIVE_OFFSET prefix = emitGetAdjustedSizeEvexAware(ins, attrSize, code); + UNATIVE_OFFSET prefix = emitGetAdjustedSize(ins, attrSize, code); // REX prefix if (TakesRexWPrefix(ins, attrSize) || IsExtendedReg(id->idReg1(), attrSize) || @@ -3150,7 +3066,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var instruction ins = id->idIns(); emitAttr attrSize = id->idOpSize(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(attrSize); - UNATIVE_OFFSET prefix = emitGetAdjustedSizeEvexAware(ins, attrSize, code); + UNATIVE_OFFSET prefix = emitGetAdjustedSize(ins, attrSize, code); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); #ifdef TARGET_AMD64 @@ -3283,7 +3199,7 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) size = 2; } - size += emitGetAdjustedSizeEvexAware(ins, attrSize, code); + size += emitGetAdjustedSize(ins, attrSize, code); if (hasRexPrefix(code)) { @@ -3501,7 +3417,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code) // can be reached via RIP-relative addressing. UNATIVE_OFFSET size = sizeof(INT32); - size += emitGetAdjustedSizeEvexAware(ins, attrSize, code); + size += emitGetAdjustedSize(ins, attrSize, code); bool includeRexPrefixSize = true; @@ -3743,7 +3659,7 @@ void emitter::emitIns(instruction ins, emitAttr attr) insFormat fmt = IF_NONE; - sz += emitGetAdjustedSizeEvexAware(ins, attr, code); + sz += emitGetAdjustedSize(ins, attr, code); if (TakesRexWPrefix(ins, attr)) { sz += emitGetRexPrefixSize(ins); @@ -4888,7 +4804,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) id->idReg1(reg); // Vex bytes - sz += emitGetAdjustedSizeEvexAware(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins))); + sz += emitGetAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins))); // REX byte if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr)) @@ -5030,7 +4946,7 @@ void emitter::emitIns_R_I(instruction ins, break; } - sz += emitGetAdjustedSizeEvexAware(ins, attr, insCodeMI(ins)); + sz += emitGetAdjustedSize(ins, attr, insCodeMI(ins)); // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index c5238685a6247e..94174885110835 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -71,7 +71,6 @@ unsigned emitGetVexPrefixSize(instruction ins, emitAttr attr); unsigned emitGetEvexPrefixSize(instruction ins); unsigned emitGetPrefixSize(code_t code, bool includeRexPrefixSize); unsigned emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code); -unsigned emitGetAdjustedSizeEvexAware(instruction ins, emitAttr attr, code_t code); unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code); unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code); From 0932ac1e378a6b9a7352383dbba69003cdfba49e Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 9 Dec 2022 10:01:47 -0800 Subject: [PATCH 03/15] Simplify the emitGetEvexPrefixSize and emitGetVexPrefixSize methods --- src/coreclr/jit/emitxarch.cpp | 521 ++++++++++++++++++++-------------- src/coreclr/jit/emitxarch.h | 2 +- 2 files changed, 309 insertions(+), 214 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 1f74aab45221a9..9d6a4a9cca196c 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1280,261 +1280,354 @@ bool isPrefix(BYTE b) } //------------------------------------------------------------------------ -// emitOutputRexOrSimdPrefixIfNeeded: Outputs EVEX prefix (in case of AVX512 instructions), -// VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. +// emitExtractEvexPrefix: Extract the EVEX prefix // // Arguments: -// ins -- processor instruction to check. -// dst -- buffer to write prefix to. -// code -- opcode bits. -// attr -- operand size +// [In] ins -- processor instruction to check. +// [In, Out] code -- opcode bits which will no longer contain the evex prefix on return // // Return Value: -// Size of prefix. +// The extracted EVEX prefix. // -unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) +emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) { - // TODO-XArch-AVX512: Remove redundant code and collapse into single pathway for EVEX and VEX if possible. - if (hasEvexPrefix(code)) - { - // Only AVX512 instructions should have an EVEX prefix - assert(IsEvexEncodedInstruction(ins)); + assert(IsEvexEncodedInstruction(ins)); - code_t evexPrefix = (code >> 32) & 0xFFFFFFFF; - code &= 0x00000000FFFFFFFFLL; + code_t evexPrefix = (code >> 32) & 0xFFFFFFFF; + code &= 0x00000000FFFFFFFFLL; - WORD leadingBytes = 0; - BYTE check = (code >> 24) & 0xFF; - if (check != 0) + WORD leadingBytes = 0; + BYTE check = (code >> 24) & 0xFF; + + if (check != 0) + { + // check for a prefix in the 11 position + BYTE sizePrefix = (code >> 16) & 0xFF; + + if ((sizePrefix != 0) && isPrefix(sizePrefix)) { - // check for a prefix in the 11 position - BYTE sizePrefix = (code >> 16) & 0xFF; - if ((sizePrefix != 0) && isPrefix(sizePrefix)) + // 'pp' bits in byte 1 of EVEX prefix allows us to encode SIMD size prefixes as two bits + // + // 00 - None (0F - packed float) + // 01 - 66 (66 0F - packed double) + // 10 - F3 (F3 0F - scalar float + // 11 - F2 (F2 0F - scalar double) + switch (sizePrefix) { - // 'pp' bits in byte 1 of EVEX prefix allows us to encode SIMD size prefixes as two bits - // - // 00 - None (0F - packed float) - // 01 - 66 (66 0F - packed double) - // 10 - F3 (F3 0F - scalar float - // 11 - F2 (F2 0F - scalar double) - switch (sizePrefix) + case 0x66: { - case 0x66: - // None of the existing BMI instructions should be EVEX encoded. - assert(!IsBMIInstruction(ins)); - evexPrefix |= (0x01 << 8); - break; - case 0xF3: - evexPrefix |= (0x02 << 8); - break; - case 0xF2: - evexPrefix |= (0x03 << 8); - break; - default: - assert(!"unrecognized SIMD size prefix"); - unreached(); + // None of the existing BMI instructions should be EVEX encoded. + assert(!IsBMIInstruction(ins)); + evexPrefix |= (0x01 << 8); + break; } - // Now the byte in the 22 position must be an escape byte 0F - leadingBytes = check; - assert(leadingBytes == 0x0F); + case 0xF3: + { + evexPrefix |= (0x02 << 8); + break; + } - // Get rid of both sizePrefix and escape byte - code &= 0x0000FFFFLL; + case 0xF2: + { + evexPrefix |= (0x03 << 8); + break; + } - // Check the byte in the 33 position to see if it is 3A or 38. - // In such a case escape bytes must be 0x0F3A or 0x0F38 - check = code & 0xFF; - if (check == 0x3A || check == 0x38) + default: { - leadingBytes = (leadingBytes << 8) | check; - code &= 0x0000FF00LL; + assert(!"unrecognized SIMD size prefix"); + unreached(); } } + + // Now the byte in the 22 position must be an escape byte 0F + leadingBytes = check; + assert(leadingBytes == 0x0F); + + // Get rid of both sizePrefix and escape byte + code &= 0x0000FFFFLL; + + // Check the byte in the 33 position to see if it is 3A or 38. + // In such a case escape bytes must be 0x0F3A or 0x0F38 + check = code & 0xFF; + + if ((check == 0x3A) || (check == 0x38)) + { + leadingBytes = (leadingBytes << 8) | check; + code &= 0x0000FF00LL; + } } - else + } + else + { + // 2-byte opcode with the bytes ordered as 0x0011RM22 + // the byte in position 11 must be an escape byte. + leadingBytes = (code >> 16) & 0xFF; + assert(leadingBytes == 0x0F || leadingBytes == 0x00); + code &= 0xFFFF; + } + + // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 + // mm bits in byte 0 of EVEX prefix allows us to encode these + // implied leading bytes. They are identical to low two bits of VEX.mmmmm + + switch (leadingBytes) + { + case 0x00: { - // 2-byte opcode with the bytes ordered as 0x0011RM22 - // the byte in position 11 must be an escape byte. - leadingBytes = (code >> 16) & 0xFF; - assert(leadingBytes == 0x0F || leadingBytes == 0x00); - code &= 0xFFFF; + // there is no leading byte + break; } - // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 - // mm bits in byte 0 of EVEX prefix allows us to encode these - // implied leading bytes. They are identical to low two bits of VEX.mmmmm + case 0x0F: + { + evexPrefix |= (0x01 << 16); + break; + } - switch (leadingBytes) + case 0x0F38: { - case 0x00: - // there is no leading byte - break; - case 0x0F: - evexPrefix |= (0x01 << 16); - break; - case 0x0F38: - evexPrefix |= (0x02 << 16); - break; - case 0x0F3A: - evexPrefix |= (0x03 << 16); - break; - default: - assert(!"encountered unknown leading bytes"); - unreached(); + evexPrefix |= (0x02 << 16); + break; } - // At this point - // EVEX.2211RM33 got transformed as EVEX.0000RM33 - // EVEX.0011RM22 got transformed as EVEX.0000RM22 - // - // Now output EVEX prefix leaving the 4-byte opcode - // EVEX prefix is always 4 bytes + case 0x0F3A: + { + evexPrefix |= (0x03 << 16); + break; + } - emitOutputByte(dst, ((evexPrefix >> 24) & 0xFF)); - emitOutputByte(dst + 1, ((evexPrefix >> 16) & 0xFF)); - emitOutputByte(dst + 2, (evexPrefix >> 8) & 0xFF); - emitOutputByte(dst + 3, evexPrefix & 0xFF); - return 4; + default: + { + assert(!"encountered unknown leading bytes"); + unreached(); + } } - else if (hasVexPrefix(code)) + + // At this point + // EVEX.2211RM33 got transformed as EVEX.0000RM33 + // EVEX.0011RM22 got transformed as EVEX.0000RM22 + // + // Now output EVEX prefix leaving the 4-byte opcode + // EVEX prefix is always 4 bytes + + return evexPrefix; +} + +//------------------------------------------------------------------------ +// emitExtractVexPrefix: Extract the VEX prefix +// +// Arguments: +// [In] ins -- processor instruction to check. +// [In, Out] code -- opcode bits which will no longer contain the vex prefix on return +// +// Return Value: +// The extracted VEX prefix. +// +emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) +{ + assert(IsVexEncodedInstruction(ins)); + + code_t vexPrefix = (code >> 32) & 0x00FFFFFF; + code &= 0x00000000FFFFFFFFLL; + + WORD leadingBytes = 0; + BYTE check = (code >> 24) & 0xFF; + + if (check != 0) { - // Only AVX instructions should have a VEX prefix - assert(UseVEXEncoding() && IsVexEncodedInstruction(ins)); - code_t vexPrefix = (code >> 32) & 0x00FFFFFF; - code &= 0x00000000FFFFFFFFLL; + // 3-byte opcode: with the bytes ordered as 0x2211RM33 or + // 4-byte opcode: with the bytes ordered as 0x22114433 - WORD leadingBytes = 0; - BYTE check = (code >> 24) & 0xFF; - if (check != 0) + // check for a prefix in the 11 position + BYTE sizePrefix = (code >> 16) & 0xFF; + + if ((sizePrefix != 0) && isPrefix(sizePrefix)) { - // 3-byte opcode: with the bytes ordered as 0x2211RM33 or - // 4-byte opcode: with the bytes ordered as 0x22114433 - // check for a prefix in the 11 position - BYTE sizePrefix = (code >> 16) & 0xFF; - if ((sizePrefix != 0) && isPrefix(sizePrefix)) + // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits + // + // 00 - None (0F - packed float) + // 01 - 66 (66 0F - packed double) + // 10 - F3 (F3 0F - scalar float + // 11 - F2 (F2 0F - scalar double) + switch (sizePrefix) { - // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits - // - // 00 - None (0F - packed float) - // 01 - 66 (66 0F - packed double) - // 10 - F3 (F3 0F - scalar float - // 11 - F2 (F2 0F - scalar double) - switch (sizePrefix) + case 0x66: { - case 0x66: - if (IsBMIInstruction(ins)) + if (IsBMIInstruction(ins)) + { + switch (ins) { - switch (ins) - { - case INS_rorx: - case INS_pdep: - case INS_mulx: + case INS_rorx: + case INS_pdep: + case INS_mulx: // TODO: Unblock when enabled for x86 #ifdef TARGET_AMD64 - case INS_shrx: + case INS_shrx: #endif - { - vexPrefix |= 0x03; - break; - } + { + vexPrefix |= 0x03; + break; + } - case INS_pext: + case INS_pext: // TODO: Unblock when enabled for x86 #ifdef TARGET_AMD64 - case INS_sarx: + case INS_sarx: #endif - { - vexPrefix |= 0x02; - break; - } + { + vexPrefix |= 0x02; + break; + } // TODO: Unblock when enabled for x86 #ifdef TARGET_AMD64 - case INS_shlx: - { - vexPrefix |= 0x01; - break; - } + case INS_shlx: + { + vexPrefix |= 0x01; + break; + } #endif - default: - { - vexPrefix |= 0x00; - break; - } + default: + { + vexPrefix |= 0x00; + break; } } - else - { - vexPrefix |= 0x01; - } - break; - case 0xF3: - vexPrefix |= 0x02; - break; - case 0xF2: - vexPrefix |= 0x03; - break; - default: - assert(!"unrecognized SIMD size prefix"); - unreached(); + } + else + { + vexPrefix |= 0x01; + } + break; } - // Now the byte in the 22 position must be an escape byte 0F - leadingBytes = check; - assert(leadingBytes == 0x0F); + case 0xF3: + { + vexPrefix |= 0x02; + break; + } - // Get rid of both sizePrefix and escape byte - code &= 0x0000FFFFLL; + case 0xF2: + { + vexPrefix |= 0x03; + break; + } - // Check the byte in the 33 position to see if it is 3A or 38. - // In such a case escape bytes must be 0x0F3A or 0x0F38 - check = code & 0xFF; - if (check == 0x3A || check == 0x38) + default: { - leadingBytes = (leadingBytes << 8) | check; - code &= 0x0000FF00LL; + assert(!"unrecognized SIMD size prefix"); + unreached(); } } + + // Now the byte in the 22 position must be an escape byte 0F + leadingBytes = check; + assert(leadingBytes == 0x0F); + + // Get rid of both sizePrefix and escape byte + code &= 0x0000FFFFLL; + + // Check the byte in the 33 position to see if it is 3A or 38. + // In such a case escape bytes must be 0x0F3A or 0x0F38 + check = code & 0xFF; + + if ((check == 0x3A) || (check == 0x38)) + { + leadingBytes = (leadingBytes << 8) | check; + code &= 0x0000FF00LL; + } } - else + } + else + { + // 2-byte opcode with the bytes ordered as 0x0011RM22 + // the byte in position 11 must be an escape byte. + leadingBytes = (code >> 16) & 0xFF; + assert(leadingBytes == 0x0F || leadingBytes == 0x00); + code &= 0xFFFF; + } + + // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 + // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these + // implied leading bytes. 0x0F is supported by both the 2-byte and + // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by + // the 3-byte version. + + switch (leadingBytes) + { + case 0x00: { - // 2-byte opcode with the bytes ordered as 0x0011RM22 - // the byte in position 11 must be an escape byte. - leadingBytes = (code >> 16) & 0xFF; - assert(leadingBytes == 0x0F || leadingBytes == 0x00); - code &= 0xFFFF; + // there is no leading byte + break; } - // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 - // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these - // implied leading bytes. 0x0F is supported by both the 2-byte and - // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by - // the 3-byte version. + case 0x0F: + { + vexPrefix |= 0x0100; + break; + } - switch (leadingBytes) + case 0x0F38: { - case 0x00: - // there is no leading byte - break; - case 0x0F: - vexPrefix |= 0x0100; - break; - case 0x0F38: - vexPrefix |= 0x0200; - break; - case 0x0F3A: - vexPrefix |= 0x0300; - break; - default: - assert(!"encountered unknown leading bytes"); - unreached(); + vexPrefix |= 0x0200; + break; } - // At this point - // VEX.2211RM33 got transformed as VEX.0000RM33 - // VEX.0011RM22 got transformed as VEX.0000RM22 - // - // Now output VEX prefix leaving the 4-byte opcode + case 0x0F3A: + { + vexPrefix |= 0x0300; + break; + } + + default: + { + assert(!"encountered unknown leading bytes"); + unreached(); + } + } + + // At this point + // VEX.2211RM33 got transformed as VEX.0000RM33 + // VEX.0011RM22 got transformed as VEX.0000RM22 + // + // Now output VEX prefix leaving the 4-byte opcode + + return vexPrefix; +} + +//------------------------------------------------------------------------ +// emitOutputRexOrSimdPrefixIfNeeded: Outputs EVEX prefix (in case of AVX512 instructions), +// VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. +// +// Arguments: +// ins -- processor instruction to check. +// dst -- buffer to write prefix to. +// code -- opcode bits. +// attr -- operand size +// +// Return Value: +// Size of prefix. +// +unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) +{ + // TODO-XArch-AVX512: Remove redundant code and collapse into single pathway for EVEX and VEX if possible. + if (hasEvexPrefix(code)) + { + code_t evexPrefix = emitExtractEvexPrefix(ins, code); + assert(evexPrefix != 0); + + emitOutputByte(dst, ((evexPrefix >> 24) & 0xFF)); + emitOutputByte(dst + 1, ((evexPrefix >> 16) & 0xFF)); + emitOutputByte(dst + 2, (evexPrefix >> 8) & 0xFF); + emitOutputByte(dst + 3, evexPrefix & 0xFF); + + return 4; + } + else if (hasVexPrefix(code)) + { + code_t vexPrefix = emitExtractVexPrefix(ins, code); + assert(vexPrefix != 0); // The 2-byte VEX encoding, requires that the X and B-bits are set (these // bits are inverted from the REX values so set means off), the W-bit is @@ -1555,16 +1648,19 @@ unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, // * X and B must be set (0x61 validates bits 5-6) // * m-mmmm must be 0-00001 (0x61 validates bits 0-4) // * W must be unset (0x00 validates bit 7) + if ((vexPrefix & 0xFFFF7F80) == 0x00C46100) { // Encoding optimization calculation is not done while estimating the instruction // size and thus over-predict instruction size by 1 byte. // If there are IGs that will be aligned, do not optimize encoding so the // estimated alignment sizes are accurate. + if (emitCurIG->igNum > emitLastAlignedIgNum) { emitOutputByte(dst, 0xC5); emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); + return 2; } } @@ -1572,6 +1668,7 @@ unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF)); emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF)); emitOutputByte(dst + 2, vexPrefix & 0xFF); + return 3; } @@ -1677,7 +1774,7 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins) } //------------------------------------------------------------------------ -// emitGetEvexPrefixSize: Gets Size of Evex prefix in bytes +// emitGetEvexPrefixSize: Gets Size of EVEX prefix in bytes // // Arguments: // ins -- The instruction @@ -1687,25 +1784,23 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins) // unsigned emitter::emitGetEvexPrefixSize(instruction ins) { - if (IsEvexEncodedInstruction(ins)) - { - return 4; - } - - // If not AVX512, then we don't need to encode prefix. - return 0; + assert(IsEvexEncodedInstruction(ins)); + return 4; } -// Size of vex prefix in bytes -unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr) +//------------------------------------------------------------------------ +// emitGetVexPrefixSize: Gets Size of VEX prefix in bytes +// +// Arguments: +// ins -- The instruction +// +// Returns: +// Prefix size in bytes. +// +unsigned emitter::emitGetVexPrefixSize(instruction ins) { - if (IsVexEncodedInstruction(ins)) - { - return 3; - } - - // If not AVX, then we don't need to encode vex prefix. - return 0; + assert(IsVexEncodedInstruction(ins)); + return 3; } //------------------------------------------------------------------------ @@ -1791,8 +1886,8 @@ unsigned emitter::emitGetAdjustedSize(instruction ins, emitAttr attr, code_t cod // = opcodeSize + (vexPrefixSize - ExtrabytesSize) // = opcodeSize + vexPrefixAdjustedSize - unsigned simdPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr); - assert(simdPrefixAdjustedSize == 3); + unsigned simdPrefixAdjustedSize = emitGetVexPrefixSize(ins); + assert((simdPrefixAdjustedSize == 2) || (simdPrefixAdjustedSize == 3)); // In this case, opcode will contains escape prefix at least one byte, // vexPrefixAdjustedSize should be minus one. diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 94174885110835..45ed8328688d9a 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -67,7 +67,7 @@ BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* id); unsigned emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code); unsigned emitGetRexPrefixSize(instruction ins); -unsigned emitGetVexPrefixSize(instruction ins, emitAttr attr); +unsigned emitGetVexPrefixSize(instruction ins); unsigned emitGetEvexPrefixSize(instruction ins); unsigned emitGetPrefixSize(code_t code, bool includeRexPrefixSize); unsigned emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code); From 7bd14bec72911408b19679a6ea496db6a3f4a9d7 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 9 Dec 2022 10:09:55 -0800 Subject: [PATCH 04/15] Create helper emitExtractVexPrefix and emitExtractEvexPrefix functions --- src/coreclr/jit/emitxarch.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 45ed8328688d9a..70d60dbc95566f 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -72,6 +72,9 @@ unsigned emitGetEvexPrefixSize(instruction ins); unsigned emitGetPrefixSize(code_t code, bool includeRexPrefixSize); unsigned emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code); +code_t emitExtractVexPrefix(instruction ins, code_t& code); +code_t emitExtractEvexPrefix(instruction ins, code_t& code); + unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code); unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code); code_t insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code); From b20a6d5bb4feb0859dbe5c8567ae02a390f9f9a9 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 9 Dec 2022 10:37:05 -0800 Subject: [PATCH 05/15] Update emitGetVexPrefixSize and emitGetEvexPrefixSize to take an instrDesc --- src/coreclr/jit/emitxarch.cpp | 88 +++++++++++++++++++++-------------- src/coreclr/jit/emitxarch.h | 8 ++-- 2 files changed, 58 insertions(+), 38 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 9d6a4a9cca196c..d13215efc2e949 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1777,13 +1777,14 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins) // emitGetEvexPrefixSize: Gets Size of EVEX prefix in bytes // // Arguments: -// ins -- The instruction +// ins -- The instruction descriptor // // Returns: // Prefix size in bytes. // -unsigned emitter::emitGetEvexPrefixSize(instruction ins) +unsigned emitter::emitGetEvexPrefixSize(instrDesc* id) { + instruction ins = id->idIns(); assert(IsEvexEncodedInstruction(ins)); return 4; } @@ -1792,13 +1793,14 @@ unsigned emitter::emitGetEvexPrefixSize(instruction ins) // emitGetVexPrefixSize: Gets Size of VEX prefix in bytes // // Arguments: -// ins -- The instruction +// id -- The instruction descriptor // // Returns: // Prefix size in bytes. // -unsigned emitter::emitGetVexPrefixSize(instruction ins) +unsigned emitter::emitGetVexPrefixSize(instrDesc* id) { + instruction ins = id->idIns(); assert(IsVexEncodedInstruction(ins)); return 3; } @@ -1808,15 +1810,17 @@ unsigned emitter::emitGetVexPrefixSize(instruction ins) // configuration. // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute +// id -- The instruction descriptor being emitted // code -- The current opcode and any known prefixes // // Returns: // Updated size. // -unsigned emitter::emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code) +unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) { + instruction ins = id->idIns(); + emitAttr attr = id->idOpSize(); + unsigned adjustedSize = 0; // TODO-XArch-AVX512: Remove redundant code and possiblly collapse EVEX and VEX into a single pathway @@ -1839,7 +1843,7 @@ unsigned emitter::emitGetAdjustedSize(instruction ins, emitAttr attr, code_t cod // = opcodeSize + (evexPrefixSize - ExtrabytesSize) // = opcodeSize + evexPrefixAdjustedSize - unsigned evexPrefixAdjustedSize = emitGetEvexPrefixSize(ins); + unsigned evexPrefixAdjustedSize = emitGetEvexPrefixSize(id); assert(evexPrefixAdjustedSize == 4); // In this case, opcode will contains escape prefix at least one byte, @@ -1886,7 +1890,7 @@ unsigned emitter::emitGetAdjustedSize(instruction ins, emitAttr attr, code_t cod // = opcodeSize + (vexPrefixSize - ExtrabytesSize) // = opcodeSize + vexPrefixAdjustedSize - unsigned simdPrefixAdjustedSize = emitGetVexPrefixSize(ins); + unsigned simdPrefixAdjustedSize = emitGetVexPrefixSize(id); assert((simdPrefixAdjustedSize == 2) || (simdPrefixAdjustedSize == 3)); // In this case, opcode will contains escape prefix at least one byte, @@ -2853,7 +2857,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code) instruction ins = id->idIns(); emitAttr attr = id->idOpSize(); - UNATIVE_OFFSET sz = emitGetAdjustedSize(ins, attr, code); + UNATIVE_OFFSET sz = emitGetAdjustedSize(id, code); bool includeRexPrefixSize = true; // REX prefix @@ -2912,8 +2916,22 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code, int val return valSize + emitInsSizeRR(id, code); } -inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr) +//------------------------------------------------------------------------ +// emitInsSizeRR: Determines the code size for an instruction encoding that does not have any addressing modes +// +// Arguments: +// id -- The instruction descriptor being emitted +// +// TODO-Cleanup: This method should really be merged with emitInsSizeRR(instrDesc*, code_t). However, it has +// existed for a long time and its not trivial to determine why it exists as a separate method. +// +inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id) { + instruction ins = id->idIns(); + regNumber reg1 = id->idReg1(); + regNumber reg2 = id->idReg2(); + emitAttr attr = id->idOpSize(); + emitAttr size = EA_SIZE(attr); // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes. @@ -2921,7 +2939,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re // This would probably be better expressed as a different format or something? code_t code = insCodeRM(ins); - UNATIVE_OFFSET sz = emitGetAdjustedSize(ins, size, insCodeRM(ins)); + UNATIVE_OFFSET sz = emitGetAdjustedSize(id, code); bool includeRexPrefixSize = true; // REX prefix @@ -3143,7 +3161,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); emitAttr attrSize = id->idOpSize(); - UNATIVE_OFFSET prefix = emitGetAdjustedSize(ins, attrSize, code); + UNATIVE_OFFSET prefix = emitGetAdjustedSize(id, code); // REX prefix if (TakesRexWPrefix(ins, attrSize) || IsExtendedReg(id->idReg1(), attrSize) || @@ -3161,7 +3179,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var instruction ins = id->idIns(); emitAttr attrSize = id->idOpSize(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(attrSize); - UNATIVE_OFFSET prefix = emitGetAdjustedSize(ins, attrSize, code); + UNATIVE_OFFSET prefix = emitGetAdjustedSize(id, code); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); #ifdef TARGET_AMD64 @@ -3294,7 +3312,7 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) size = 2; } - size += emitGetAdjustedSize(ins, attrSize, code); + size += emitGetAdjustedSize(id, code); if (hasRexPrefix(code)) { @@ -3512,7 +3530,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code) // can be reached via RIP-relative addressing. UNATIVE_OFFSET size = sizeof(INT32); - size += emitGetAdjustedSize(ins, attrSize, code); + size += emitGetAdjustedSize(id, code); bool includeRexPrefixSize = true; @@ -3754,14 +3772,15 @@ void emitter::emitIns(instruction ins, emitAttr attr) insFormat fmt = IF_NONE; - sz += emitGetAdjustedSize(ins, attr, code); + id->idIns(ins); + id->idInsFmt(fmt); + + sz += emitGetAdjustedSize(id, code); if (TakesRexWPrefix(ins, attr)) { sz += emitGetRexPrefixSize(ins); } - id->idIns(ins); - id->idInsFmt(fmt); id->idCodeSize(sz); dispIns(id); @@ -4899,7 +4918,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) id->idReg1(reg); // Vex bytes - sz += emitGetAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins))); + sz += emitGetAdjustedSize(id, insEncodeMRreg(ins, reg, attr, insCodeMR(ins))); // REX byte if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr)) @@ -5041,7 +5060,17 @@ void emitter::emitIns_R_I(instruction ins, break; } - sz += emitGetAdjustedSize(ins, attr, insCodeMI(ins)); + id = emitNewInstrSC(attr, val); + id->idIns(ins); + id->idInsFmt(fmt); + id->idReg1(reg); + +#ifdef DEBUG + id->idDebugOnlyInfo()->idFlags = gtFlags; + id->idDebugOnlyInfo()->idMemCookie = targetHandle; +#endif + + sz += emitGetAdjustedSize(id, insCodeMI(ins)); // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target @@ -5051,17 +5080,8 @@ void emitter::emitIns_R_I(instruction ins, sz += emitGetRexPrefixSize(ins); } - id = emitNewInstrSC(attr, val); - id->idIns(ins); - id->idInsFmt(fmt); - id->idReg1(reg); id->idCodeSize(sz); -#ifdef DEBUG - id->idDebugOnlyInfo()->idFlags = gtFlags; - id->idDebugOnlyInfo()->idMemCookie = targetHandle; -#endif - dispIns(id); emitCurIGsize += sz; @@ -5633,13 +5653,13 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN return; } - UNATIVE_OFFSET sz = emitInsSizeRR(ins, dstReg, srcReg, attr); - instrDesc* id = emitNewInstrSmall(attr); id->idIns(ins); id->idInsFmt(fmt); id->idReg1(dstReg); id->idReg2(srcReg); + + UNATIVE_OFFSET sz = emitInsSizeRR(id); id->idCodeSize(sz); dispIns(id); @@ -5664,8 +5684,6 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum assert(size <= EA_32BYTE); noway_assert(emitVerifyEncodable(ins, size, reg1, reg2)); - UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr); - /* Special case: "XCHG" uses a different format */ insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD); @@ -5674,6 +5692,8 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum id->idInsFmt(fmt); id->idReg1(reg1); id->idReg2(reg2); + + UNATIVE_OFFSET sz = emitInsSizeRR(id); id->idCodeSize(sz); dispIns(id); diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 70d60dbc95566f..0beff1a7ce4b63 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -44,7 +44,7 @@ UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp); UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val); UNATIVE_OFFSET emitInsSizeRR(instrDesc* id, code_t code); UNATIVE_OFFSET emitInsSizeRR(instrDesc* id, code_t code, int val); -UNATIVE_OFFSET emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr); +UNATIVE_OFFSET emitInsSizeRR(instrDesc* id); UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code); UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code, int val); UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, code_t code); @@ -67,10 +67,10 @@ BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* id); unsigned emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code); unsigned emitGetRexPrefixSize(instruction ins); -unsigned emitGetVexPrefixSize(instruction ins); -unsigned emitGetEvexPrefixSize(instruction ins); +unsigned emitGetVexPrefixSize(instrDesc* id); +unsigned emitGetEvexPrefixSize(instrDesc* id); unsigned emitGetPrefixSize(code_t code, bool includeRexPrefixSize); -unsigned emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code); +unsigned emitGetAdjustedSize(instrDesc* id, code_t code); code_t emitExtractVexPrefix(instruction ins, code_t& code); code_t emitExtractEvexPrefix(instruction ins, code_t& code); From 4d94897f78259a5f79d9ceaac41cc13d0dee5421 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 9 Dec 2022 10:56:40 -0800 Subject: [PATCH 06/15] Update emitGetPrefixSize to take the instrDesc so the VEX prefix size can be computed correctly --- src/coreclr/jit/emitxarch.cpp | 53 ++++++++++++++++++++++------------- src/coreclr/jit/emitxarch.h | 4 +-- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index d13215efc2e949..3de335d8bf33a6 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1949,19 +1949,20 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) // emitGetPrefixSize: Get size of rex or vex prefix emitted in code // // Arguments: +// id -- The instruction descriptor for which to get its prefix size // code -- The current opcode and any known prefixes // includeRexPrefixSize -- If Rex Prefix size should be included or not // -unsigned emitter::emitGetPrefixSize(code_t code, bool includeRexPrefixSize) +unsigned emitter::emitGetPrefixSize(instrDesc* id, code_t code, bool includeRexPrefixSize) { if (hasEvexPrefix(code)) { - return 4; + return emitGetEvexPrefixSize(id); } if (hasVexPrefix(code)) { - return 3; + return emitGetVexPrefixSize(id); } if (includeRexPrefixSize && hasRexPrefix(code)) @@ -2832,14 +2833,15 @@ bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1 // emitInsSize: Estimate the size (in bytes of generated code) of the given instruction. // // Arguments: +// id -- The instruction descriptor for which to estimate its size // code -- The current opcode and any known prefixes // includeRexPrefixSize -- If Rex Prefix size should be included or not // -inline UNATIVE_OFFSET emitter::emitInsSize(code_t code, bool includeRexPrefixSize) +inline UNATIVE_OFFSET emitter::emitInsSize(instrDesc* id, code_t code, bool includeRexPrefixSize) { UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2; #ifdef TARGET_AMD64 - size += emitGetPrefixSize(code, includeRexPrefixSize); + size += emitGetPrefixSize(id, code, includeRexPrefixSize); #endif return size; } @@ -2868,7 +2870,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code) includeRexPrefixSize = !IsVexEncodedInstruction(ins); } - sz += emitInsSize(code, includeRexPrefixSize); + sz += emitInsSize(id, code, includeRexPrefixSize); return sz; } @@ -2955,11 +2957,11 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id) if ((code & 0xFF00) != 0) { - sz += IsAvx512OrPriorInstruction(ins) ? emitInsSize(code, includeRexPrefixSize) : 5; + sz += IsAvx512OrPriorInstruction(ins) ? emitInsSize(id, code, includeRexPrefixSize) : 5; } else { - sz += emitInsSize(insEncodeRMreg(ins, code), includeRexPrefixSize); + sz += emitInsSize(id, insEncodeRMreg(ins, code), includeRexPrefixSize); } return sz; @@ -2979,7 +2981,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id) // inline UNATIVE_OFFSET emitter::emitInsSizeSVCalcDisp(instrDesc* id, code_t code, int var, int dsp) { - UNATIVE_OFFSET size = emitInsSize(code, /* includeRexPrefixSize */ true); + UNATIVE_OFFSET size = emitInsSize(id, code, /* includeRexPrefixSize */ true); UNATIVE_OFFSET offs; bool offsIsUpperBound = true; bool EBPbased = true; @@ -3542,7 +3544,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code) includeRexPrefixSize = false; } - return size + emitInsSize(code, includeRexPrefixSize); + return size + emitInsSize(id, code, includeRexPrefixSize); } inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val) @@ -4966,6 +4968,11 @@ void emitter::emitIns_R_I(instruction ins, // (it is always encoded in a byte). Let's not complicate things until this is needed. assert(ins != INS_bt); + // TODO-Cleanup: This is used to track whether a call to emitInsSize is necessary. The call + // has existed for quite some time, historically, but it is likely problematic in practice. + // However, it is non-trivial to prove it is safe to remove at this time. + bool isSimdInsAndValInByte = false; + // Figure out the size of the instruction switch (ins) { @@ -5012,16 +5019,8 @@ void emitter::emitIns_R_I(instruction ins, { if (IsAvx512OrPriorInstruction(ins)) { - bool includeRexPrefixSize = true; - // Do not get the RexSize() but just decide if it will be included down further and if yes, - // do not include it again. - if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins)) - { - includeRexPrefixSize = false; - } - - sz = emitInsSize(insCodeMI(ins), includeRexPrefixSize); - sz += 1; + sz = 1; + isSimdInsAndValInByte = true; } else if (size == EA_1BYTE && reg == REG_EAX && !instrIs3opImul(ins)) { @@ -5070,6 +5069,20 @@ void emitter::emitIns_R_I(instruction ins, id->idDebugOnlyInfo()->idMemCookie = targetHandle; #endif + if (isSimdInsAndValInByte) + { + bool includeRexPrefixSize = true; + + // Do not get the RexSize() but just decide if it will be included down further and if yes, + // do not include it again. + if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins)) + { + includeRexPrefixSize = false; + } + + sz += emitInsSize(id, insCodeMI(ins), includeRexPrefixSize); + } + sz += emitGetAdjustedSize(id, insCodeMI(ins)); // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 0beff1a7ce4b63..30a42f301e1d1e 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -38,7 +38,7 @@ struct CnsVal bool cnsReloc; }; -UNATIVE_OFFSET emitInsSize(code_t code, bool includeRexPrefixSize); +UNATIVE_OFFSET emitInsSize(instrDesc* id, code_t code, bool includeRexPrefixSize); UNATIVE_OFFSET emitInsSizeSVCalcDisp(instrDesc* id, code_t code, int var, int dsp); UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp); UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val); @@ -69,7 +69,7 @@ unsigned emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& c unsigned emitGetRexPrefixSize(instruction ins); unsigned emitGetVexPrefixSize(instrDesc* id); unsigned emitGetEvexPrefixSize(instrDesc* id); -unsigned emitGetPrefixSize(code_t code, bool includeRexPrefixSize); +unsigned emitGetPrefixSize(instrDesc* id, code_t code, bool includeRexPrefixSize); unsigned emitGetAdjustedSize(instrDesc* id, code_t code); code_t emitExtractVexPrefix(instruction ins, code_t& code); From ae744eb74682fec40d34e55e37445de0b5d936f6 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 9 Dec 2022 13:08:14 -0800 Subject: [PATCH 07/15] Update emitGetVexPrefixSize to support returning 2 or 3 --- src/coreclr/jit/emitxarch.cpp | 176 ++++++++++++++++++++++++++++------ 1 file changed, 149 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 3de335d8bf33a6..822cc064a7ed8f 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1651,18 +1651,10 @@ unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, if ((vexPrefix & 0xFFFF7F80) == 0x00C46100) { - // Encoding optimization calculation is not done while estimating the instruction - // size and thus over-predict instruction size by 1 byte. - // If there are IGs that will be aligned, do not optimize encoding so the - // estimated alignment sizes are accurate. + emitOutputByte(dst, 0xC5); + emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); - if (emitCurIG->igNum > emitLastAlignedIgNum) - { - emitOutputByte(dst, 0xC5); - emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); - - return 2; - } + return 2; } emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF)); @@ -1789,22 +1781,6 @@ unsigned emitter::emitGetEvexPrefixSize(instrDesc* id) return 4; } -//------------------------------------------------------------------------ -// emitGetVexPrefixSize: Gets Size of VEX prefix in bytes -// -// Arguments: -// id -- The instruction descriptor -// -// Returns: -// Prefix size in bytes. -// -unsigned emitter::emitGetVexPrefixSize(instrDesc* id) -{ - instruction ins = id->idIns(); - assert(IsVexEncodedInstruction(ins)); - return 3; -} - //------------------------------------------------------------------------ // emitGetAdjustedSize: Determines any size adjustment needed for a given instruction based on the current // configuration. @@ -2345,6 +2321,152 @@ inline bool hasCodeMR(instruction ins) return ((insCodesMR[ins] != BAD_CODE)); } +//------------------------------------------------------------------------ +// emitGetVexPrefixSize: Gets Size of VEX prefix in bytes +// +// Arguments: +// id -- The instruction descriptor +// +// Returns: +// Prefix size in bytes. +// +unsigned emitter::emitGetVexPrefixSize(instrDesc* id) +{ + instruction ins = id->idIns(); + emitAttr size = id->idOpSize(); + + assert(IsVexEncodedInstruction(ins)); + + if (EncodedBySSE38orSSE3A(ins)) + { + // When the prefix is 0x0F38 or 0x0F3A, we must use the 3-byte encoding + return 3; + } + + switch (ins) + { + case INS_crc32: +#if defined(TARGET_AMD64) + case INS_sarx: + case INS_shrx: +#endif // TARGET_AMD64 + { + // When the prefix is 0x0F38 or 0x0F3A, we must use the 3-byte encoding + // These are special cases where the pp-bit is 0xF2 or 0xF3 and not 0x66 + return 3; + } + + default: + { + break; + } + } + + if (TakesRexWPrefix(ins, size)) + { + // When the REX.W bit is present, we must use the 3-byte encoding + return 3; + } + + regNumber regFor012Bits = REG_NA; + regNumber regForSibBits = REG_NA; + + switch (id->idInsFmt()) + { + case IF_ARD: + case IF_AWR_RRD: + case IF_RRD_ARD: + case IF_RRW_ARD: + case IF_RRW_ARD_CNS: + case IF_RWR_ARD: + case IF_RWR_ARD_CNS: + case IF_RWR_RRD_ARD: + case IF_RWR_RRD_ARD_CNS: + { + regFor012Bits = id->idAddr()->iiaAddrMode.amBaseReg; + regForSibBits = id->idAddr()->iiaAddrMode.amIndxReg; + break; + } + + case IF_MRD: + case IF_MWR_RRD: + case IF_RRD_MRD: + case IF_RRD_SRD: + case IF_RRW_MRD: + case IF_RRW_MRD_CNS: + case IF_RRW_SRD: + case IF_RRW_SRD_CNS: + case IF_RWR_MRD: + case IF_RWR_MRD_CNS: + case IF_RWR_RRD_MRD: + case IF_RWR_RRD_MRD_CNS: + case IF_RWR_RRD_SRD: + case IF_RWR_RRD_SRD_CNS: + case IF_RWR_SRD: + case IF_RWR_SRD_CNS: + case IF_SRD: + case IF_SWR_RRD: + { + // Nothing is encoded in a way to prevent the 2-byte encoding + break; + } + + case IF_RRD_RRD: + case IF_RRW_RRD: + case IF_RWR_RRD: + { + regFor012Bits = id->idReg2(); + + if ((ins == INS_movd) && isFloatReg(regFor012Bits)) + { + regFor012Bits = id->idReg1(); + } + break; + } + + case IF_RRW_RRW_CNS: + { + if (hasCodeMR(ins)) + { + regFor012Bits = id->idReg1(); + } + else + { + regFor012Bits = id->idReg2(); + } + break; + } + + case IF_RWR_RRD_RRD: + case IF_RWR_RRD_RRD_CNS: + case IF_RWR_RRD_RRD_RRD: + { + regFor012Bits = id->idReg3(); + break; + } + + default: + { + assert(!"Unhandled insFmt for emitGetVexPrefixSize"); + return 3; + } + } + + if ((regForSibBits != REG_NA) && IsExtendedReg(regForSibBits)) + { + // When the REX.X bit is present, we must use the 3-byte encoding + return 3; + } + + if ((regFor012Bits != REG_NA) && IsExtendedReg(regFor012Bits)) + { + // When the REX.B bit is present, we must use the 3-byte encoding + return 3; + } + + return 2; +} + /***************************************************************************** * * Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction. From f2ff3c4c1bd2fd2702daf346c3dbfe23ed933ec7 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 9 Dec 2022 17:38:57 -0800 Subject: [PATCH 08/15] Ensure IF_RWR_CNS is handled in emitGetVexPrefixSize --- src/coreclr/jit/emitxarch.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 822cc064a7ed8f..5e96383afa2085 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -2411,6 +2411,14 @@ unsigned emitter::emitGetVexPrefixSize(instrDesc* id) break; } + case IF_RRD_CNS: + case IF_RRW_CNS: + case IF_RWR_CNS: + { + regFor012Bits = id->idReg1(); + break; + } + case IF_RRD_RRD: case IF_RRW_RRD: case IF_RWR_RRD: From e131b6be8e9f867e05588ff813534d4a78caa020 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 9 Dec 2022 21:10:45 -0800 Subject: [PATCH 09/15] Don't try to estimate the 2-byte VEX prefix when optimizations are disabled --- src/coreclr/jit/emitxarch.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 5e96383afa2085..25557af82c124e 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -2337,6 +2337,12 @@ unsigned emitter::emitGetVexPrefixSize(instrDesc* id) assert(IsVexEncodedInstruction(ins)); + if (!emitComp->opts.OptimizationEnabled()) + { + // Don't worry about saving code size when optimizations are disabled + return 3; + } + if (EncodedBySSE38orSSE3A(ins)) { // When the prefix is 0x0F38 or 0x0F3A, we must use the 3-byte encoding From ea56d5f6dc34bc0d7d583ab9ddd7d931e309c97d Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 9 Dec 2022 21:45:58 -0800 Subject: [PATCH 10/15] Ensure we don't negatively impact estimated alignment sizes for debug code --- src/coreclr/jit/emitxarch.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 25557af82c124e..33c8b59e746887 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1651,10 +1651,20 @@ unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, if ((vexPrefix & 0xFFFF7F80) == 0x00C46100) { - emitOutputByte(dst, 0xC5); - emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); + // Encoding optimization calculation when estimating the instruction size is + // only done when optimizations are enabled since its more expensive. So when + // optimizations are disabled we'll have over-predicted the instruction size + // by one byte. Due to how IG alignment is done, we only want to emit the 2-byte + // prefix if optimizations are enabled or we know we won't negatively impact the + // estimated alignment sizes. - return 2; + if (emitComp->opts.OptimizationEnabled() || (emitCurIG->igNum > emitLastAlignedIgNum)) + { + emitOutputByte(dst, 0xC5); + emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); + + return 2; + } } emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF)); From 501435595abf597f8e8e50717e8327385148dde0 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 10 Dec 2022 06:11:59 -0800 Subject: [PATCH 11/15] Just check MinOpts not OptimizationsDisabled --- src/coreclr/jit/emitxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 33c8b59e746887..8b3d46eb683f1d 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1658,7 +1658,7 @@ unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, // prefix if optimizations are enabled or we know we won't negatively impact the // estimated alignment sizes. - if (emitComp->opts.OptimizationEnabled() || (emitCurIG->igNum > emitLastAlignedIgNum)) + if (!emitComp->opts.MinOpts() || (emitCurIG->igNum > emitLastAlignedIgNum)) { emitOutputByte(dst, 0xC5); emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); @@ -2347,7 +2347,7 @@ unsigned emitter::emitGetVexPrefixSize(instrDesc* id) assert(IsVexEncodedInstruction(ins)); - if (!emitComp->opts.OptimizationEnabled()) + if (emitComp->opts.MinOpts()) { // Don't worry about saving code size when optimizations are disabled return 3; From 989c1ec083aca977fdf4684f08bba51ca1dab193 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 10 Dec 2022 06:28:42 -0800 Subject: [PATCH 12/15] Mark some of the modified methods as const to indicate they don't mutate the emitter --- src/coreclr/jit/emitxarch.cpp | 14 +++++++------- src/coreclr/jit/emitxarch.h | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 8b3d46eb683f1d..598dc262ddd5bc 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -747,7 +747,7 @@ static bool IsDstSrcImmAvxInstruction(instruction ins) // Note that this should be true for any of the instructions in instrsXArch.h // that use the SSE38 or SSE3A macro but returns false if the VEX encoding is // in use, since that encoding does not require an additional byte. -bool emitter::Is4ByteSSEInstruction(instruction ins) +bool emitter::Is4ByteSSEInstruction(instruction ins) const { return !UseVEXEncoding() && EncodedBySSE38orSSE3A(ins); } @@ -1289,7 +1289,7 @@ bool isPrefix(BYTE b) // Return Value: // The extracted EVEX prefix. // -emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) +emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) const { assert(IsEvexEncodedInstruction(ins)); @@ -1425,7 +1425,7 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) // Return Value: // The extracted VEX prefix. // -emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) +emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) const { assert(IsVexEncodedInstruction(ins)); @@ -1784,7 +1784,7 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins) // Returns: // Prefix size in bytes. // -unsigned emitter::emitGetEvexPrefixSize(instrDesc* id) +unsigned emitter::emitGetEvexPrefixSize(instrDesc* id) const { instruction ins = id->idIns(); assert(IsEvexEncodedInstruction(ins)); @@ -1802,7 +1802,7 @@ unsigned emitter::emitGetEvexPrefixSize(instrDesc* id) // Returns: // Updated size. // -unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) +unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const { instruction ins = id->idIns(); emitAttr attr = id->idOpSize(); @@ -2340,7 +2340,7 @@ inline bool hasCodeMR(instruction ins) // Returns: // Prefix size in bytes. // -unsigned emitter::emitGetVexPrefixSize(instrDesc* id) +unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const { instruction ins = id->idIns(); emitAttr size = id->idOpSize(); @@ -2555,7 +2555,7 @@ inline insTupleType insTupleTypeInfo(instruction ins) } // Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h. -bool emitter::EncodedBySSE38orSSE3A(instruction ins) +bool emitter::EncodedBySSE38orSSE3A(instruction ins) const { const size_t SSE38 = 0x0F660038; const size_t SSE3A = 0x0F66003A; diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 30a42f301e1d1e..dd4eec46dadb92 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -67,13 +67,13 @@ BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* id); unsigned emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code); unsigned emitGetRexPrefixSize(instruction ins); -unsigned emitGetVexPrefixSize(instrDesc* id); -unsigned emitGetEvexPrefixSize(instrDesc* id); +unsigned emitGetVexPrefixSize(instrDesc* id) const; +unsigned emitGetEvexPrefixSize(instrDesc* id) const; unsigned emitGetPrefixSize(instrDesc* id, code_t code, bool includeRexPrefixSize); -unsigned emitGetAdjustedSize(instrDesc* id, code_t code); +unsigned emitGetAdjustedSize(instrDesc* id, code_t code) const; -code_t emitExtractVexPrefix(instruction ins, code_t& code); -code_t emitExtractEvexPrefix(instruction ins, code_t& code); +code_t emitExtractVexPrefix(instruction ins, code_t& code) const; +code_t emitExtractEvexPrefix(instruction ins, code_t& code) const; unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code); unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code); @@ -111,8 +111,8 @@ code_t AddRexXPrefix(instruction ins, code_t code); code_t AddRexBPrefix(instruction ins, code_t code); code_t AddRexPrefix(instruction ins, code_t code); -bool EncodedBySSE38orSSE3A(instruction ins); -bool Is4ByteSSEInstruction(instruction ins); +bool EncodedBySSE38orSSE3A(instruction ins) const; +bool Is4ByteSSEInstruction(instruction ins) const; static bool IsMovInstruction(instruction ins); bool HasSideEffect(instruction ins, emitAttr size); bool IsRedundantMov( From 0f2514d19baa54ba150ccd1f87fd9ced01f12ce5 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 10 Dec 2022 07:12:01 -0800 Subject: [PATCH 13/15] Resolve the throughput issue by not getting optSize untill necessary --- src/coreclr/jit/emitxarch.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 598dc262ddd5bc..b4b038733ca444 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1658,7 +1658,7 @@ unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, // prefix if optimizations are enabled or we know we won't negatively impact the // estimated alignment sizes. - if (!emitComp->opts.MinOpts() || (emitCurIG->igNum > emitLastAlignedIgNum)) + if (emitComp->opts.OptimizationEnabled() || (emitCurIG->igNum > emitLastAlignedIgNum)) { emitOutputByte(dst, 0xC5); emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); @@ -2342,17 +2342,16 @@ inline bool hasCodeMR(instruction ins) // unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const { - instruction ins = id->idIns(); - emitAttr size = id->idOpSize(); - - assert(IsVexEncodedInstruction(ins)); + assert(IsVexEncodedInstruction(id->idIns())); - if (emitComp->opts.MinOpts()) + if (emitComp->opts.OptimizationDisabled()) { // Don't worry about saving code size when optimizations are disabled return 3; } + instruction ins = id->idIns(); + if (EncodedBySSE38orSSE3A(ins)) { // When the prefix is 0x0F38 or 0x0F3A, we must use the 3-byte encoding @@ -2378,6 +2377,8 @@ unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const } } + emitAttr size = id->idOpSize(); + if (TakesRexWPrefix(ins, size)) { // When the REX.W bit is present, we must use the 3-byte encoding From 4d0c099c49af681c38bec08d0bf9d43e5c9ad087 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 10 Dec 2022 09:44:01 -0800 Subject: [PATCH 14/15] Switch back to doing accurate vex prefix size estimation always --- src/coreclr/jit/emitxarch.cpp | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index b4b038733ca444..de4153c5a694ed 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1651,20 +1651,10 @@ unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, if ((vexPrefix & 0xFFFF7F80) == 0x00C46100) { - // Encoding optimization calculation when estimating the instruction size is - // only done when optimizations are enabled since its more expensive. So when - // optimizations are disabled we'll have over-predicted the instruction size - // by one byte. Due to how IG alignment is done, we only want to emit the 2-byte - // prefix if optimizations are enabled or we know we won't negatively impact the - // estimated alignment sizes. + emitOutputByte(dst, 0xC5); + emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); - if (emitComp->opts.OptimizationEnabled() || (emitCurIG->igNum > emitLastAlignedIgNum)) - { - emitOutputByte(dst, 0xC5); - emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); - - return 2; - } + return 2; } emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF)); @@ -2344,12 +2334,6 @@ unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const { assert(IsVexEncodedInstruction(id->idIns())); - if (emitComp->opts.OptimizationDisabled()) - { - // Don't worry about saving code size when optimizations are disabled - return 3; - } - instruction ins = id->idIns(); if (EncodedBySSE38orSSE3A(ins)) From 57d572571676820ddbf495a1b1a723b38cb1c2db Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 13 Dec 2022 14:09:56 -0800 Subject: [PATCH 15/15] Small cleanup to reduce regression --- src/coreclr/jit/emitxarch.cpp | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index de4153c5a694ed..14ad6263e5ad22 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1776,8 +1776,7 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins) // unsigned emitter::emitGetEvexPrefixSize(instrDesc* id) const { - instruction ins = id->idIns(); - assert(IsEvexEncodedInstruction(ins)); + assert(IsEvexEncodedInstruction(id->idIns())); return 4; } @@ -1794,10 +1793,8 @@ unsigned emitter::emitGetEvexPrefixSize(instrDesc* id) const // unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const { - instruction ins = id->idIns(); - emitAttr attr = id->idOpSize(); - - unsigned adjustedSize = 0; + instruction ins = id->idIns(); + unsigned adjustedSize = 0; // TODO-XArch-AVX512: Remove redundant code and possiblly collapse EVEX and VEX into a single pathway // IsEvexEncodedInstruction(ins) is `true` for AVX/SSE instructions also which needs to be VEX encoded unless @@ -1910,6 +1907,8 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const adjustedSize++; } + emitAttr attr = id->idOpSize(); + if ((attr == EA_2BYTE) && (ins != INS_movzx) && (ins != INS_movsx)) { // Most 16-bit operand instructions will need a 0x66 prefix. @@ -2322,7 +2321,7 @@ inline bool hasCodeMR(instruction ins) } //------------------------------------------------------------------------ -// emitGetVexPrefixSize: Gets Size of VEX prefix in bytes +// emitGetVexPrefixSize: Gets size of VEX prefix in bytes // // Arguments: // id -- The instruction descriptor @@ -2332,9 +2331,8 @@ inline bool hasCodeMR(instruction ins) // unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const { - assert(IsVexEncodedInstruction(id->idIns())); - instruction ins = id->idIns(); + assert(IsVexEncodedInstruction(ins)); if (EncodedBySSE38orSSE3A(ins)) { @@ -3060,12 +3058,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code, int val // inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id) { - instruction ins = id->idIns(); - regNumber reg1 = id->idReg1(); - regNumber reg2 = id->idReg2(); - emitAttr attr = id->idOpSize(); - - emitAttr size = EA_SIZE(attr); + instruction ins = id->idIns(); // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes. @@ -3078,6 +3071,11 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id) // REX prefix if (!hasRexPrefix(code)) { + regNumber reg1 = id->idReg1(); + regNumber reg2 = id->idReg2(); + emitAttr attr = id->idOpSize(); + emitAttr size = EA_SIZE(attr); + if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr)) {