-
Notifications
You must be signed in to change notification settings - Fork 5.3k
[RyuJIT] Emit shlx, sarx, shrx on x64 #67182
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3e6017a
4a7b404
23395b3
9c07d03
adbfc48
53181e4
28438a0
60742ee
ab2624f
2198328
f629bfd
2292e05
f85d34a
36b26a2
5b454bb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4403,6 +4403,34 @@ void CodeGen::genCodeForShift(GenTree* tree) | |
| inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue); | ||
| } | ||
| } | ||
| #if defined(TARGET_64BIT) | ||
| else if (tree->OperIsShift() && compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2)) | ||
| { | ||
| // Try to emit shlx, sarx, shrx if BMI2 is available instead of mov+shl, mov+sar, mov+shr. | ||
| switch (tree->OperGet()) | ||
| { | ||
| case GT_LSH: | ||
| ins = INS_shlx; | ||
| break; | ||
|
|
||
| case GT_RSH: | ||
| ins = INS_sarx; | ||
| break; | ||
|
|
||
| case GT_RSZ: | ||
| ins = INS_shrx; | ||
| break; | ||
|
|
||
| default: | ||
| unreached(); | ||
| } | ||
|
|
||
| regNumber shiftByReg = shiftBy->GetRegNum(); | ||
| emitAttr size = emitTypeSize(tree); | ||
| // The order of operandReg and shiftByReg are swapped to follow shlx, sarx and shrx encoding spec. | ||
| GetEmitter()->emitIns_R_R_R(ins, size, tree->GetRegNum(), shiftByReg, operandReg); | ||
|
||
| } | ||
| #endif | ||
| else | ||
| { | ||
| // We must have the number of bits to shift stored in ECX, since we constrained this node to | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -749,6 +749,9 @@ bool emitter::TakesRexWPrefix(instruction ins, emitAttr attr) | |||||
| case INS_pdep: | ||||||
| case INS_pext: | ||||||
| case INS_rorx: | ||||||
| case INS_shlx: | ||||||
|
||||||
| case INS_sarx: | ||||||
| case INS_shrx: | ||||||
| return true; | ||||||
| default: | ||||||
| return false; | ||||||
|
|
@@ -987,17 +990,32 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c | |||||
| case INS_rorx: | ||||||
| case INS_pdep: | ||||||
| case INS_mulx: | ||||||
| // TODO: Unblock when enabled for x86 | ||||||
| #ifdef TARGET_AMD64 | ||||||
| case INS_shrx: | ||||||
|
||||||
| #endif | ||||||
| { | ||||||
| vexPrefix |= 0x03; | ||||||
| break; | ||||||
| } | ||||||
|
|
||||||
| case INS_pext: | ||||||
| // TODO: Unblock when enabled for x86 | ||||||
| #ifdef TARGET_AMD64 | ||||||
| case INS_sarx: | ||||||
| #endif | ||||||
| { | ||||||
| vexPrefix |= 0x02; | ||||||
| break; | ||||||
| } | ||||||
|
|
||||||
| // TODO: Unblock when enabled for x86 | ||||||
| #ifdef TARGET_AMD64 | ||||||
| case INS_shlx: | ||||||
| { | ||||||
| vexPrefix |= 0x01; | ||||||
| break; | ||||||
| } | ||||||
| #endif | ||||||
| default: | ||||||
| { | ||||||
| vexPrefix |= 0x00; | ||||||
|
|
@@ -1484,6 +1502,11 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) | |||||
| case INS_pextrw: | ||||||
| case INS_pextrw_sse41: | ||||||
| case INS_rorx: | ||||||
| #ifdef TARGET_AMD64 | ||||||
| case INS_shlx: | ||||||
| case INS_sarx: | ||||||
| case INS_shrx: | ||||||
| #endif | ||||||
| { | ||||||
| // These SSE instructions write to a general purpose integer register. | ||||||
| return false; | ||||||
|
|
@@ -9519,9 +9542,13 @@ void emitter::emitDispIns( | |||||
| assert(IsThreeOperandAVXInstruction(ins)); | ||||||
| regNumber reg2 = id->idReg2(); | ||||||
| regNumber reg3 = id->idReg3(); | ||||||
| if (ins == INS_bextr || ins == INS_bzhi) | ||||||
| if (ins == INS_bextr || ins == INS_bzhi | ||||||
| #ifdef TARGET_AMD64 | ||||||
| || ins == INS_shrx || ins == INS_shlx || ins == INS_sarx | ||||||
| #endif | ||||||
| ) | ||||||
| { | ||||||
| // BMI bextr and bzhi encodes the reg2 in VEX.vvvv and reg3 in modRM, | ||||||
| // BMI bextr,bzhi, shrx, shlx and sarx encode the reg2 in VEX.vvvv and reg3 in modRM, | ||||||
|
||||||
| // BMI bextr,bzhi, shrx, shlx and sarx encode the reg2 in VEX.vvvv and reg3 in modRM, | |
| // BMI bextr, bzhi, shrx, shlx and sarx encode the reg2 in VEX.vvvv and reg3 in modRM, |
We need to have similar comment where we swap the operands I pointed out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: You have already added a comment in codegenxarch.cpp. No need here. The above comment covers it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4850,7 +4850,7 @@ void Lowering::ContainCheckShiftRotate(GenTreeOp* node) | |
| assert(source->OperGet() == GT_LONG); | ||
| MakeSrcContained(node, source); | ||
| } | ||
| #endif // !TARGET_X86 | ||
| #endif | ||
|
||
|
|
||
| GenTree* shiftBy = node->gtOp2; | ||
| if (IsContainableImmed(node, shiftBy) && (shiftBy->AsIntConCommon()->IconValue() <= 255) && | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -925,6 +925,18 @@ int LinearScan::BuildShiftRotate(GenTree* tree) | |
| { | ||
| assert(shiftBy->OperIsConst()); | ||
| } | ||
| #if defined(TARGET_64BIT) | ||
|
||
| else if (tree->OperIsShift() && !tree->isContained() && | ||
| compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2)) | ||
| { | ||
| // shlx (as opposed to mov+shl) instructions handles all register forms, but it does not handle contained form | ||
| // for memory operand. Likewise for sarx and shrx. | ||
| srcCount += BuildOperandUses(source, srcCandidates); | ||
| srcCount += BuildOperandUses(shiftBy, srcCandidates); | ||
| BuildDef(tree, dstCandidates); | ||
| return srcCount; | ||
| } | ||
| #endif | ||
| else | ||
| { | ||
| srcCandidates = allRegs(TYP_INT) & ~RBM_RCX; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
same here...why is it only for x64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Handling for x64 only for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Handling for x64 only for now.