From d5cc29784038a58a966e673667327d3aec928798 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 10 Jun 2021 21:46:47 -0700 Subject: [PATCH 1/3] Port IsRedundantMov to xarch --- src/coreclr/jit/emitxarch.cpp | 239 +++++++++++++++++++++++++--------- src/coreclr/jit/emitxarch.h | 1 + 2 files changed, 180 insertions(+), 60 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 96b12839ad214f..60bcb5f6e287ed 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -4253,7 +4253,6 @@ bool emitter::IsMovInstruction(instruction ins) case INS_movd: case INS_movdqa: case INS_movdqu: - case INS_movsd: case INS_movsdsse2: case INS_movss: case INS_movsx: @@ -4279,6 +4278,180 @@ bool emitter::IsMovInstruction(instruction ins) } } +//---------------------------------------------------------------------------------------- +// IsRedundantMov: +// Check if the current `mov` instruction is redundant and can be omitted. +// A `mov` is redundant in following 3 cases: +// +// 1. Move to same register on TARGET_AMD64 +// (Except 4-byte movement like "mov eax, eax" which zeros out upper bits of eax register) +// +// mov rax, rax +// +// 2. Move that is identical to last instruction emitted. +// +// mov rax, rbx # <-- last instruction +// mov rax, rbx # <-- current instruction can be omitted. +// +// 3. Opposite Move as that of last instruction emitted. +// +// mov rax, rbx # <-- last instruction +// mov rbx, rax # <-- current instruction can be omitted. +// +// Arguments: +// ins - The current instruction +// fmt - The current format +// size - Operand size of current instruction +// dst - The current destination +// src - The current source +// canSkip - The move can be skipped as it doesn't represent special semantics +// +// Return Value: +// true if the move instruction is redundant; otherwise, false. + +bool emitter::IsRedundantMov(instruction ins, insFormat fmt, emitAttr size, regNumber dst, regNumber src, bool canSkip) +{ + assert(IsMovInstruction(ins)); + + if (canSkip && (dst == src)) + { + // These elisions used to be explicit even when optimizations were disabled + + // Some instructions have a side effect and shouldn't be skipped + // however existing codepaths were skipping these instructions in + // certain scenarios and so we skip them as well for back-compat + // when canSkip is true (see below for which have a side effect). + // + // Long term, these paths should be audited and should likely be + // replaced with copies rather than extensions. + return true; + } + + if (!emitComp->opts.OptimizationEnabled()) + { + // The remaining move elisions should only happen if optimizations are enabled + return false; + } + + // TODO-XArch-CQ: There are places where the fact that an instruction zero-extends + // is not an important detail, such as when "regular" floating-point code is generated + // + // This differs from cases like HWIntrinsics that deal with the entire vector and so + // they need to be "aware" that a given move impacts the upper-bits. + // + // Ideally we can detect this difference, likely via canSkip, and allow the below + // optimizations for those scenarios as well. + + // Track whether the instruction has a zero/sign-extension or clearing of the upper-bits as a side-effect + bool hasSideEffect = false; + + switch (ins) + { + case INS_mov: + { + // non EA_PTRSIZE moves may zero-extend the source + hasSideEffect = (size != EA_PTRSIZE); + break; + } + + case INS_movapd: + case INS_movaps: + case INS_movdqa: + case INS_movdqu: + case INS_movupd: + case INS_movups: + { + // non EA_32BYTE moves clear the upper bits under VEX encoding + hasSideEffect = UseVEXEncoding() && (size != EA_32BYTE); + break; + } + + case INS_movd: + { + // Clears the upper bits + hasSideEffect = true; + break; + } + + case INS_movsdsse2: + case INS_movss: + { + // Clears the upper bits under VEX encoding + hasSideEffect = UseVEXEncoding(); + break; + } + + case INS_movsx: + case INS_movzx: + { + // Sign/Zero-extends the source + hasSideEffect = true; + break; + } + +#if defined(TARGET_AMD64) + case INS_movq: + { + // Clears the upper bits + hasSideEffect = true; + break; + } + + case INS_movsxd: + { + // Sign-extends the source + hasSideEffect = true; + break; + } +#endif // TARGET_AMD64 + + default: + { + unreached(); + } + } + + // Check if we are already in the correct register and don't have a side effect + if ((dst == src) && !hasSideEffect) + { + JITDUMP("\n -- suppressing mov because src and dst is same register and the mov has no side-effects.\n"); + return true; + } + + bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0); + + // TODO-XArch-CQ: Certain instructions, such as movaps vs movups, are equivalent in + // functionality even if their actual identifier differs and we should optimize these + + if (isFirstInstrInBlock || // Don't optimize if instruction is the first instruction in IG. + (emitLastIns == nullptr) || // or if a last instruction doesn't exist + (emitLastIns->idIns() != ins) || // or if the instruction is different from the last instruction + (emitLastIns->idOpSize() != size) || // or if the operand size is different from the last instruction + (emitLastIns->idInsFmt() != fmt)) // or if the format is different from the last instruction + { + return false; + } + + regNumber lastDst = emitLastIns->idReg1(); + regNumber lastSrc = emitLastIns->idReg2(); + + // Check if we did same move in last instruction, side effects don't matter since they already happened + if ((lastDst == dst) && (lastSrc == src)) + { + JITDUMP("\n -- suppressing mov because last instruction already moved from src to dst register.\n"); + return true; + } + + // Check if we did a switched mov in the last instruction and don't have a side effect + if ((lastDst == src) && (lastSrc == dst) && !hasSideEffect) + { + JITDUMP("\n -- suppressing mov because last instruction already moved from dst to src register and the mov has no side-effects.\n"); + return true; + } + + return false; +} + //------------------------------------------------------------------------ // emitIns_Mov: Emits a move instruction // @@ -4309,7 +4482,6 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN case INS_movaps: case INS_movdqa: case INS_movdqu: - case INS_movsd: case INS_movsdsse2: case INS_movss: case INS_movupd: @@ -4351,67 +4523,14 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN assert(size <= EA_32BYTE); noway_assert(emitVerifyEncodable(ins, size, dstReg, srcReg)); - if (canSkip && (dstReg == srcReg)) - { - switch (ins) - { - case INS_mov: - { - // These instructions have no side effect and can be skipped - return; - } - - case INS_movapd: - case INS_movaps: - case INS_movdqa: - case INS_movdqu: - case INS_movupd: - case INS_movups: - { - // These instructions have no side effect and can be skipped - return; - } - - case INS_movd: - case INS_movsd: - case INS_movsdsse2: - case INS_movss: - case INS_movsx: - case INS_movzx: - { - // These instructions have a side effect and shouldn't be skipped - // however existing codepaths were skipping these instructions in - // certain scenarios and so we skip them as well for back-compat. - // - // Long term, these paths should be audited and should likely be - // replaced with copies rather than extensions. - return; - } - -#if defined(TARGET_AMD64) - case INS_movq: - case INS_movsxd: - { - // These instructions have a side effect and shouldn't be skipped - // however existing codepaths were skipping these instructions in - // certain scenarios and so we skip them as well for back-compat. - // - // Long term, these paths should be audited and should likely be - // replaced with copies rather than extensions. - return; - } -#endif // TARGET_AMD64 - - default: - { - unreached(); - } - } - } - UNATIVE_OFFSET sz = emitInsSizeRR(ins, dstReg, srcReg, attr); insFormat fmt = emitInsModeFormat(ins, IF_RRD_RRD); + if (IsRedundantMov(ins, fmt, attr, dstReg, srcReg, canSkip)) + { + return; + } + instrDesc* id = emitNewInstrSmall(attr); id->idIns(ins); id->idInsFmt(fmt); diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index cda15711c2c095..2210b09506793c 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -95,6 +95,7 @@ code_t AddRexPrefix(instruction ins, code_t code); bool EncodedBySSE38orSSE3A(instruction ins); bool Is4ByteSSEInstruction(instruction ins); static bool IsMovInstruction(instruction ins); +bool IsRedundantMov(instruction ins, insFormat fmt, emitAttr size, regNumber dst, regNumber src, bool canSkip); bool AreUpper32BitsZero(regNumber reg); From db9a6e182aae1516cb91b07e8f0c54d670941fa8 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 11 Jun 2021 13:48:32 -0700 Subject: [PATCH 2/3] Applying formatting patch --- src/coreclr/jit/emitxarch.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 60bcb5f6e287ed..7bc9acb51e7e23 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -4423,11 +4423,11 @@ bool emitter::IsRedundantMov(instruction ins, insFormat fmt, emitAttr size, regN // TODO-XArch-CQ: Certain instructions, such as movaps vs movups, are equivalent in // functionality even if their actual identifier differs and we should optimize these - if (isFirstInstrInBlock || // Don't optimize if instruction is the first instruction in IG. - (emitLastIns == nullptr) || // or if a last instruction doesn't exist - (emitLastIns->idIns() != ins) || // or if the instruction is different from the last instruction - (emitLastIns->idOpSize() != size) || // or if the operand size is different from the last instruction - (emitLastIns->idInsFmt() != fmt)) // or if the format is different from the last instruction + if (isFirstInstrInBlock || // Don't optimize if instruction is the first instruction in IG. + (emitLastIns == nullptr) || // or if a last instruction doesn't exist + (emitLastIns->idIns() != ins) || // or if the instruction is different from the last instruction + (emitLastIns->idOpSize() != size) || // or if the operand size is different from the last instruction + (emitLastIns->idInsFmt() != fmt)) // or if the format is different from the last instruction { return false; } @@ -4445,7 +4445,8 @@ bool emitter::IsRedundantMov(instruction ins, insFormat fmt, emitAttr size, regN // Check if we did a switched mov in the last instruction and don't have a side effect if ((lastDst == src) && (lastSrc == dst) && !hasSideEffect) { - JITDUMP("\n -- suppressing mov because last instruction already moved from dst to src register and the mov has no side-effects.\n"); + JITDUMP("\n -- suppressing mov because last instruction already moved from dst to src register and the mov has " + "no side-effects.\n"); return true; } From 304fbb054c2a0ad50e0d83d319e4736d65b9efd1 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 12 Jun 2021 07:55:26 -0700 Subject: [PATCH 3/3] Responding to PR feedback --- src/coreclr/jit/emitxarch.cpp | 38 +++++++++++++++++++++++++---------- src/coreclr/jit/emitxarch.h | 3 ++- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 7bc9acb51e7e23..6578232a92996e 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -4243,6 +4243,20 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld // Arguments: // ins -- The instruction being checked // +// Return Value: +// true if the instruction is a qualifying move instruction; otherwise, false +// +// Remarks: +// This methods covers most kinds of two operand move instructions that copy a +// value between two registers. It does not cover all move-like instructions +// and so doesn't currently cover things like movsb/movsw/movsd/movsq or cmovcc +// and doesn't currently cover cases where a value is read/written from memory. +// +// The reason it doesn't cover all instructions was namely to limit the scope +// of the initial change to that which was impactful to move elision so that +// it could be centrally managed and optimized. It may be beneficial to support +// the other move instructions in the future but that may require more extensive +// changes to ensure relevant codegen/emit paths flow and check things correctly. bool emitter::IsMovInstruction(instruction ins) { switch (ins) @@ -4299,28 +4313,30 @@ bool emitter::IsMovInstruction(instruction ins) // mov rbx, rax # <-- current instruction can be omitted. // // Arguments: -// ins - The current instruction -// fmt - The current format -// size - Operand size of current instruction -// dst - The current destination -// src - The current source -// canSkip - The move can be skipped as it doesn't represent special semantics +// ins - The current instruction +// fmt - The current format +// size - Operand size of current instruction +// dst - The current destination +// src - The current source +// canIgnoreSideEffects - The move can be skipped as it doesn't represent special semantics // // Return Value: // true if the move instruction is redundant; otherwise, false. -bool emitter::IsRedundantMov(instruction ins, insFormat fmt, emitAttr size, regNumber dst, regNumber src, bool canSkip) +bool emitter::IsRedundantMov( + instruction ins, insFormat fmt, emitAttr size, regNumber dst, regNumber src, bool canIgnoreSideEffects) { assert(IsMovInstruction(ins)); - if (canSkip && (dst == src)) + if (canIgnoreSideEffects && (dst == src)) { // These elisions used to be explicit even when optimizations were disabled // Some instructions have a side effect and shouldn't be skipped // however existing codepaths were skipping these instructions in // certain scenarios and so we skip them as well for back-compat - // when canSkip is true (see below for which have a side effect). + // when canIgnoreSideEffects is true (see below for which have a + // side effect). // // Long term, these paths should be audited and should likely be // replaced with copies rather than extensions. @@ -4339,8 +4355,8 @@ bool emitter::IsRedundantMov(instruction ins, insFormat fmt, emitAttr size, regN // This differs from cases like HWIntrinsics that deal with the entire vector and so // they need to be "aware" that a given move impacts the upper-bits. // - // Ideally we can detect this difference, likely via canSkip, and allow the below - // optimizations for those scenarios as well. + // Ideally we can detect this difference, likely via canIgnoreSideEffects, and allow + // the below optimizations for those scenarios as well. // Track whether the instruction has a zero/sign-extension or clearing of the upper-bits as a side-effect bool hasSideEffect = false; diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 2210b09506793c..8260445686be09 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -95,7 +95,8 @@ code_t AddRexPrefix(instruction ins, code_t code); bool EncodedBySSE38orSSE3A(instruction ins); bool Is4ByteSSEInstruction(instruction ins); static bool IsMovInstruction(instruction ins); -bool IsRedundantMov(instruction ins, insFormat fmt, emitAttr size, regNumber dst, regNumber src, bool canSkip); +bool IsRedundantMov( + instruction ins, insFormat fmt, emitAttr size, regNumber dst, regNumber src, bool canIgnoreSideEffects); bool AreUpper32BitsZero(regNumber reg);