diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index e4e89b29bacd02..b7e27497739ef1 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -757,6 +757,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableLZCNT, W("EnableLZCNT"), 1, "Allows LZCNT+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1, "Allows PCLMULQDQ+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableMOVBE, W("EnableMOVBE"), 1, "Allows MOVBE+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePOPCNT, W("EnablePOPCNT"), 1, "Allows POPCNT+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE, W("EnableSSE"), 1, "Allows SSE+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE2, W("EnableSSE2"), 1, "Allows SSE2+ hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index b8ccc0af8446a5..778b1839e4afbd 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -58,23 +58,25 @@ enum CORINFO_InstructionSet InstructionSet_Vector128=17, InstructionSet_Vector256=18, InstructionSet_AVXVNNI=19, - InstructionSet_X86Base_X64=20, - InstructionSet_SSE_X64=21, - InstructionSet_SSE2_X64=22, - InstructionSet_SSE3_X64=23, - InstructionSet_SSSE3_X64=24, - InstructionSet_SSE41_X64=25, - InstructionSet_SSE42_X64=26, - InstructionSet_AVX_X64=27, - InstructionSet_AVX2_X64=28, - InstructionSet_AES_X64=29, - InstructionSet_BMI1_X64=30, - InstructionSet_BMI2_X64=31, - InstructionSet_FMA_X64=32, - InstructionSet_LZCNT_X64=33, - InstructionSet_PCLMULQDQ_X64=34, - InstructionSet_POPCNT_X64=35, - InstructionSet_AVXVNNI_X64=36, + InstructionSet_MOVBE=20, + InstructionSet_X86Base_X64=21, + InstructionSet_SSE_X64=22, + InstructionSet_SSE2_X64=23, + InstructionSet_SSE3_X64=24, + InstructionSet_SSSE3_X64=25, + InstructionSet_SSE41_X64=26, + InstructionSet_SSE42_X64=27, + InstructionSet_AVX_X64=28, + InstructionSet_AVX2_X64=29, + InstructionSet_AES_X64=30, + InstructionSet_BMI1_X64=31, + InstructionSet_BMI2_X64=32, + InstructionSet_FMA_X64=33, + InstructionSet_LZCNT_X64=34, + InstructionSet_PCLMULQDQ_X64=35, + InstructionSet_POPCNT_X64=36, + InstructionSet_AVXVNNI_X64=37, + InstructionSet_MOVBE_X64=38, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -96,23 +98,25 @@ enum CORINFO_InstructionSet InstructionSet_Vector128=17, InstructionSet_Vector256=18, InstructionSet_AVXVNNI=19, - InstructionSet_X86Base_X64=20, - InstructionSet_SSE_X64=21, - InstructionSet_SSE2_X64=22, - InstructionSet_SSE3_X64=23, - InstructionSet_SSSE3_X64=24, - InstructionSet_SSE41_X64=25, - InstructionSet_SSE42_X64=26, - InstructionSet_AVX_X64=27, - InstructionSet_AVX2_X64=28, - InstructionSet_AES_X64=29, - InstructionSet_BMI1_X64=30, - InstructionSet_BMI2_X64=31, - InstructionSet_FMA_X64=32, - InstructionSet_LZCNT_X64=33, - InstructionSet_PCLMULQDQ_X64=34, - InstructionSet_POPCNT_X64=35, - InstructionSet_AVXVNNI_X64=36, + InstructionSet_MOVBE=20, + InstructionSet_X86Base_X64=21, + InstructionSet_SSE_X64=22, + InstructionSet_SSE2_X64=23, + InstructionSet_SSE3_X64=24, + InstructionSet_SSSE3_X64=25, + InstructionSet_SSE41_X64=26, + InstructionSet_SSE42_X64=27, + InstructionSet_AVX_X64=28, + InstructionSet_AVX2_X64=29, + InstructionSet_AES_X64=30, + InstructionSet_BMI1_X64=31, + InstructionSet_BMI2_X64=32, + InstructionSet_FMA_X64=33, + InstructionSet_LZCNT_X64=34, + InstructionSet_PCLMULQDQ_X64=35, + InstructionSet_POPCNT_X64=36, + InstructionSet_AVXVNNI_X64=37, + InstructionSet_MOVBE_X64=38, #endif // TARGET_X86 }; @@ -212,6 +216,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_POPCNT_X64); if (HasInstructionSet(InstructionSet_AVXVNNI)) AddInstructionSet(InstructionSet_AVXVNNI_X64); + if (HasInstructionSet(InstructionSet_MOVBE)) + AddInstructionSet(InstructionSet_MOVBE_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -357,6 +363,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64); + if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_MOVBE_X64)) + resultflags.RemoveInstructionSet(InstructionSet_MOVBE); + if (resultflags.HasInstructionSet(InstructionSet_MOVBE_X64) && !resultflags.HasInstructionSet(InstructionSet_MOVBE)) + resultflags.RemoveInstructionSet(InstructionSet_MOVBE_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -393,6 +403,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_Vector256); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) + resultflags.RemoveInstructionSet(InstructionSet_MOVBE); #endif // TARGET_AMD64 #ifdef TARGET_X86 if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) @@ -431,6 +443,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_Vector256); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) + resultflags.RemoveInstructionSet(InstructionSet_MOVBE); #endif // TARGET_X86 } while (!oldflags.Equals(resultflags)); @@ -563,6 +577,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVXVNNI"; case InstructionSet_AVXVNNI_X64 : return "AVXVNNI_X64"; + case InstructionSet_MOVBE : + return "MOVBE"; + case InstructionSet_MOVBE_X64 : + return "MOVBE_X64"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -603,6 +621,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Vector256"; case InstructionSet_AVXVNNI : return "AVXVNNI"; + case InstructionSet_MOVBE : + return "MOVBE"; #endif // TARGET_X86 default: @@ -652,6 +672,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; + case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -671,6 +692,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ; case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT; case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; + case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 013fa160ce7370..ce062f5e082b1a 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 7503fe09-4852-40f6-829a-ff91402c9604 */ - 0x7503fe09, - 0x4852, - 0x40f6, - {0x82, 0x9a, 0xff, 0x91, 0x40, 0x2c, 0x96, 0x04} +constexpr GUID JITEEVersionIdentifier = { /* dfda4767-9618-4d6a-8105-a86034dc52eb */ + 0xdfda4767, + 0x9618, + 0x4d6a, + {0x81, 0x05, 0xa8, 0x60, 0x34, 0xdc, 0x52, 0xeb} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 6d7d65a651b6bb..62935f58668d6b 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -35,6 +35,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Rdm=24, READYTORUN_INSTRUCTION_AvxVnni=25, READYTORUN_INSTRUCTION_Rcpc=26, + READYTORUN_INSTRUCTION_Movbe=27, }; diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 2eb21176ba1d2b..6212a529fa471c 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1646,7 +1646,7 @@ void CodeGen::genConsumeRegs(GenTree* tree) } #endif // FEATURE_HW_INTRINSICS #endif // TARGET_XARCH - else if (tree->OperIs(GT_BITCAST, GT_NEG, GT_CAST, GT_LSH)) + else if (tree->OperIs(GT_BITCAST, GT_NEG, GT_CAST, GT_LSH, GT_BSWAP, GT_BSWAP16)) { genConsumeRegs(tree->gtGetOp1()); } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index ef6c90f3dcaefa..50da6453637044 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -553,35 +553,38 @@ void CodeGen::genCodeForNegNot(GenTree* tree) // void CodeGen::genCodeForBswap(GenTree* tree) { - // TODO: If we're swapping immediately after a read from memory or immediately before - // a write to memory, use the MOVBE instruction instead of the BSWAP instruction if - // the platform supports it. - assert(tree->OperIs(GT_BSWAP, GT_BSWAP16)); regNumber targetReg = tree->GetRegNum(); var_types targetType = tree->TypeGet(); GenTree* operand = tree->gtGetOp1(); - assert(operand->isUsedFromReg()); - regNumber operandReg = genConsumeReg(operand); - inst_Mov(targetType, targetReg, operandReg, /* canSkip */ true); + genConsumeRegs(operand); - if (tree->OperIs(GT_BSWAP)) + if (operand->isUsedFromReg()) { - // 32-bit and 64-bit byte swaps use "bswap reg" - inst_RV(INS_bswap, targetReg, targetType); + inst_Mov(targetType, targetReg, operand->GetRegNum(), /* canSkip */ true); + + if (tree->OperIs(GT_BSWAP)) + { + // 32-bit and 64-bit byte swaps use "bswap reg" + inst_RV(INS_bswap, targetReg, targetType); + } + else + { + // 16-bit byte swaps use "ror reg.16, 8" + inst_RV_IV(INS_ror_N, targetReg, 8 /* val */, emitAttr::EA_2BYTE); + } } else { - // 16-bit byte swaps use "ror reg.16, 8" - inst_RV_IV(INS_ror_N, targetReg, 8 /* val */, emitAttr::EA_2BYTE); + GetEmitter()->emitInsBinary(INS_movbe, emitTypeSize(operand), tree, operand); + } - if (!genCanOmitNormalizationForBswap16(tree)) - { - GetEmitter()->emitIns_Mov(INS_movzx, EA_2BYTE, targetReg, targetReg, /* canSkip */ false); - } + if (tree->OperIs(GT_BSWAP16) && !genCanOmitNormalizationForBswap16(tree)) + { + GetEmitter()->emitIns_Mov(INS_movzx, EA_2BYTE, targetReg, targetReg, /* canSkip */ false); } genProduceReg(tree); @@ -5140,7 +5143,10 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) } else { - GetEmitter()->emitInsStoreInd(ins_Store(data->TypeGet()), emitTypeSize(tree), tree); + GetEmitter()->emitInsStoreInd(data->OperIs(GT_BSWAP, GT_BSWAP16) && data->isContained() + ? INS_movbe + : ins_Store(data->TypeGet()), + emitTypeSize(tree), tree); } } } diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index ca78ac9e94d30e..85b834cc815fa5 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -3332,6 +3332,13 @@ void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* m GenTree* addr = mem->Addr(); GenTree* data = mem->Data(); + if (data->OperIs(GT_BSWAP, GT_BSWAP16) && data->isContained()) + { + assert(ins == INS_movbe); + + data = data->gtGetOp1(); + } + if (addr->OperGet() == GT_CLS_VAR_ADDR) { if (data->isContainedIntOrIImmed()) @@ -10415,6 +10422,13 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Is this a 'big' opcode? else if (code & 0xFF000000) { + if (size == EA_2BYTE) + { + assert(ins == INS_movbe); + + dst += emitOutputByte(dst, 0x66); + } + // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); @@ -11199,6 +11213,13 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Is this a 'big' opcode? else if (code & 0xFF000000) { + if (size == EA_2BYTE) + { + assert(ins == INS_movbe); + + dst += emitOutputByte(dst, 0x66); + } + // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); @@ -11669,6 +11690,13 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Is this a 'big' opcode? else if (code & 0xFF000000) { + if (size == EA_2BYTE) + { + assert(ins == INS_movbe); + + dst += emitOutputByte(dst, 0x66); + } + // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); @@ -16275,6 +16303,20 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; } + case INS_movbe: + if (memAccessKind == PERFSCORE_MEMORY_READ) + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_1C; + } + else + { + assert(memAccessKind == PERFSCORE_MEMORY_WRITE); + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_1C; + } + break; + default: // unhandled instruction insFmt combination perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index bb76e56b6a0542..e08667e87a9f5c 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -167,6 +167,7 @@ INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, #define SSEDBL(c) PACK3(0xf2, 0x0f, c) #define PCKDBL(c) PACK3(0x66, 0x0f, c) #define PCKFLT(c) PACK2(0x0f,c) +#define PCKMVB(c) PACK3(0x0F, 0x38, c) // These macros encode extra byte that is implicit in the macro. #define PACK4(byte1,byte2,byte3,byte4) (((byte1) << 16) | ((byte2) << 24) | (byte3) | ((byte4) << 8)) @@ -618,6 +619,9 @@ INST3(tzcnt, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, // LZCNT INST3(lzcnt, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF ) +// MOVBE +INST3(movbe, "movbe", IUM_WR, PCKMVB(0xF1), BAD_CODE, PCKMVB(0xF0), INS_FLAGS_None ) + // POPCNT INST3(popcnt, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Resets_CF ) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index d01a9acaedec35..b1792246c00c52 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -418,6 +418,13 @@ GenTree* Lowering::LowerNode(GenTree* node) } break; +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + case GT_BSWAP: + case GT_BSWAP16: + LowerBswapOp(node->AsOp()); + break; +#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH + default: break; } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 0512d4173ac13e..7ad3f6202d6697 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -350,6 +350,7 @@ class Lowering final : public Phase GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode); GenTree* TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode); GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode); + void LowerBswapOp(GenTreeOp* node); #elif defined(TARGET_ARM64) bool IsValidConstForMovImm(GenTreeHWIntrinsic* node); void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index f7c9a10e5a8481..6a5f5161358cc6 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3945,6 +3945,33 @@ GenTree* Lowering::TryLowerAndOpToAndNot(GenTreeOp* andNode) return andnNode; } +//---------------------------------------------------------------------------------------------- +// Lowering::LowerBswapOp: Tries to contain GT_BSWAP node when possible +// +// Arguments: +// node - GT_BSWAP node to contain +// +// Notes: +// Containment is not performed when optimizations are disabled +// or when MOVBE instruction set is not found +// +void Lowering::LowerBswapOp(GenTreeOp* node) +{ + assert(node->OperIs(GT_BSWAP, GT_BSWAP16)); + + if (!comp->opts.OptimizationEnabled() || !comp->compOpportunisticallyDependsOn(InstructionSet_MOVBE)) + { + return; + } + + GenTree* operand = node->gtGetOp1(); + unsigned swapSize = node->OperIs(GT_BSWAP16) ? 2 : genTypeSize(node); + if ((swapSize == genTypeSize(operand)) && IsContainableMemoryOp(operand) && IsSafeToContainMem(node, operand)) + { + MakeSrcContained(node, operand); + } +} + #endif // FEATURE_HW_INTRINSICS //---------------------------------------------------------------------------------------------- @@ -4575,6 +4602,20 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) MakeSrcContained(node, src); } + // If the source is a BSWAP, contain it on supported hardware to generate a MOVBE. + if (comp->opts.OptimizationEnabled() && src->OperIs(GT_BSWAP, GT_BSWAP16) && + comp->compOpportunisticallyDependsOn(InstructionSet_MOVBE)) + { + unsigned swapSize = src->OperIs(GT_BSWAP16) ? 2 : genTypeSize(src); + if ((swapSize == genTypeSize(node)) && IsSafeToContainMem(node, src)) + { + // Prefer containing in the store in case the load has been contained. + src->gtGetOp1()->ClearContained(); + + MakeSrcContained(node, src); + } + } + ContainCheckIndir(node); } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 29401b9ab67754..e97b7497766287 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3123,6 +3123,10 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) { return BuildAddrUses(node, candidates); } + if (node->OperIs(GT_BSWAP, GT_BSWAP16)) + { + return BuildOperandUses(node->gtGetOp1(), candidates); + } #ifdef FEATURE_HW_INTRINSICS if (node->OperIsHWIntrinsic()) { diff --git a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h index 70c6d6b3d7bed0..f13851f87c12b7 100644 --- a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h +++ b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h @@ -23,6 +23,7 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Bmi2 = 0x0800, XArchIntrinsicConstants_Lzcnt = 0x1000, XArchIntrinsicConstants_AvxVnni = 0x2000, + XArchIntrinsicConstants_Movbe = 0x4000, }; #endif //HOST_X86 || HOST_AMD64 diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index ab5a7fe9a0d66d..eb5e08b0d6851a 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -178,6 +178,11 @@ bool DetectCPUFeatures() { g_cpuFeatures |= XArchIntrinsicConstants_Sse42; + if ((cpuidInfo[ECX] & (1 << 22)) != 0) // MOVBE + { + g_cpuFeatures |= XArchIntrinsicConstants_Movbe; + } + if ((cpuidInfo[ECX] & (1 << 23)) != 0) // POPCNT { g_cpuFeatures |= XArchIntrinsicConstants_Popcnt; diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index ae045b02cfd13b..1157bf84edf4e6 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -38,6 +38,7 @@ public enum ReadyToRunInstructionSet Rdm=24, AvxVnni=25, Rcpc=26, + Movbe=27, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 730e6c4f41680d..69c29c7f960707 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -89,6 +89,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_Vector256: return null; case InstructionSet.X64_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X64_AVXVNNI_X64: return ReadyToRunInstructionSet.AvxVnni; + case InstructionSet.X64_MOVBE: return ReadyToRunInstructionSet.Movbe; + case InstructionSet.X64_MOVBE_X64: return ReadyToRunInstructionSet.Movbe; default: throw new Exception("Unknown instruction set"); } @@ -134,6 +136,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_Vector256: return null; case InstructionSet.X86_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni; case InstructionSet.X86_AVXVNNI_X64: return null; + case InstructionSet.X86_MOVBE: return ReadyToRunInstructionSet.Movbe; + case InstructionSet.X86_MOVBE_X64: return null; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index f90947e8697661..fc90274c415bf3 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -58,6 +58,7 @@ public enum InstructionSet X64_Vector128 = InstructionSet_X64.Vector128, X64_Vector256 = InstructionSet_X64.Vector256, X64_AVXVNNI = InstructionSet_X64.AVXVNNI, + X64_MOVBE = InstructionSet_X64.MOVBE, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE_X64 = InstructionSet_X64.SSE_X64, X64_SSE2_X64 = InstructionSet_X64.SSE2_X64, @@ -75,6 +76,7 @@ public enum InstructionSet X64_PCLMULQDQ_X64 = InstructionSet_X64.PCLMULQDQ_X64, X64_POPCNT_X64 = InstructionSet_X64.POPCNT_X64, X64_AVXVNNI_X64 = InstructionSet_X64.AVXVNNI_X64, + X64_MOVBE_X64 = InstructionSet_X64.MOVBE_X64, X86_X86Base = InstructionSet_X86.X86Base, X86_SSE = InstructionSet_X86.SSE, X86_SSE2 = InstructionSet_X86.SSE2, @@ -94,6 +96,7 @@ public enum InstructionSet X86_Vector128 = InstructionSet_X86.Vector128, X86_Vector256 = InstructionSet_X86.Vector256, X86_AVXVNNI = InstructionSet_X86.AVXVNNI, + X86_MOVBE = InstructionSet_X86.MOVBE, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE_X64 = InstructionSet_X86.SSE_X64, X86_SSE2_X64 = InstructionSet_X86.SSE2_X64, @@ -111,6 +114,7 @@ public enum InstructionSet X86_PCLMULQDQ_X64 = InstructionSet_X86.PCLMULQDQ_X64, X86_POPCNT_X64 = InstructionSet_X86.POPCNT_X64, X86_AVXVNNI_X64 = InstructionSet_X86.AVXVNNI_X64, + X86_MOVBE_X64 = InstructionSet_X86.MOVBE_X64, } public enum InstructionSet_ARM64 @@ -163,23 +167,25 @@ public enum InstructionSet_X64 Vector128 = 17, Vector256 = 18, AVXVNNI = 19, - X86Base_X64 = 20, - SSE_X64 = 21, - SSE2_X64 = 22, - SSE3_X64 = 23, - SSSE3_X64 = 24, - SSE41_X64 = 25, - SSE42_X64 = 26, - AVX_X64 = 27, - AVX2_X64 = 28, - AES_X64 = 29, - BMI1_X64 = 30, - BMI2_X64 = 31, - FMA_X64 = 32, - LZCNT_X64 = 33, - PCLMULQDQ_X64 = 34, - POPCNT_X64 = 35, - AVXVNNI_X64 = 36, + MOVBE = 20, + X86Base_X64 = 21, + SSE_X64 = 22, + SSE2_X64 = 23, + SSE3_X64 = 24, + SSSE3_X64 = 25, + SSE41_X64 = 26, + SSE42_X64 = 27, + AVX_X64 = 28, + AVX2_X64 = 29, + AES_X64 = 30, + BMI1_X64 = 31, + BMI2_X64 = 32, + FMA_X64 = 33, + LZCNT_X64 = 34, + PCLMULQDQ_X64 = 35, + POPCNT_X64 = 36, + AVXVNNI_X64 = 37, + MOVBE_X64 = 38, } public enum InstructionSet_X86 @@ -205,23 +211,25 @@ public enum InstructionSet_X86 Vector128 = 17, Vector256 = 18, AVXVNNI = 19, - X86Base_X64 = 20, - SSE_X64 = 21, - SSE2_X64 = 22, - SSE3_X64 = 23, - SSSE3_X64 = 24, - SSE41_X64 = 25, - SSE42_X64 = 26, - AVX_X64 = 27, - AVX2_X64 = 28, - AES_X64 = 29, - BMI1_X64 = 30, - BMI2_X64 = 31, - FMA_X64 = 32, - LZCNT_X64 = 33, - PCLMULQDQ_X64 = 34, - POPCNT_X64 = 35, - AVXVNNI_X64 = 36, + MOVBE = 20, + X86Base_X64 = 21, + SSE_X64 = 22, + SSE2_X64 = 23, + SSE3_X64 = 24, + SSSE3_X64 = 25, + SSE41_X64 = 26, + SSE42_X64 = 27, + AVX_X64 = 28, + AVX2_X64 = 29, + AES_X64 = 30, + BMI1_X64 = 31, + BMI2_X64 = 32, + FMA_X64 = 33, + LZCNT_X64 = 34, + PCLMULQDQ_X64 = 35, + POPCNT_X64 = 36, + AVXVNNI_X64 = 37, + MOVBE_X64 = 38, } public struct InstructionSetFlags : IEnumerable @@ -457,6 +465,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE)) + resultflags.AddInstructionSet(InstructionSet.X64_MOVBE_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_MOVBE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -493,6 +505,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE42); break; case TargetArchitecture.X86: @@ -532,6 +546,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX); if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_MOVBE)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE42); break; } } while (!oldflags.Equals(resultflags)); @@ -626,6 +642,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_POPCNT); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_MOVBE); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -662,6 +680,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_Vector256); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) + resultflags.AddInstructionSet(InstructionSet.X64_MOVBE); break; case TargetArchitecture.X86: @@ -701,6 +721,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_Vector256); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) + resultflags.AddInstructionSet(InstructionSet.X86_MOVBE); break; } } while (!oldflags.Equals(resultflags)); @@ -765,6 +787,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false); yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true); + yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X64_MOVBE, true); break; case TargetArchitecture.X86: @@ -787,6 +810,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false); yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false); yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true); + yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X86_MOVBE, true); break; } } @@ -850,6 +874,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_POPCNT_X64); if (HasInstructionSet(InstructionSet.X64_AVXVNNI)) AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); + if (HasInstructionSet(InstructionSet.X64_MOVBE)) + AddInstructionSet(InstructionSet.X64_MOVBE_X64); break; case TargetArchitecture.X86: @@ -891,6 +917,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64); AddInstructionSet(InstructionSet.X64_POPCNT_X64); AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); + AddInstructionSet(InstructionSet.X64_MOVBE_X64); break; case TargetArchitecture.X86: @@ -911,6 +938,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_PCLMULQDQ_X64); AddInstructionSet(InstructionSet.X86_POPCNT_X64); AddInstructionSet(InstructionSet.X86_AVXVNNI_X64); + AddInstructionSet(InstructionSet.X86_MOVBE_X64); break; } } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 1ab412ba69f147..f1db26c5235561 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -44,6 +44,7 @@ instructionset ,X86 ,Popcnt , ,15 ,POPCNT ,popcnt instructionset ,X86 , , , ,Vector128, instructionset ,X86 , , , ,Vector256, instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni +instructionset ,X86 ,Movbe , ,27 ,MOVBE ,movbe instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -62,6 +63,7 @@ instructionset64bit,X86 ,LZCNT instructionset64bit,X86 ,PCLMULQDQ instructionset64bit,X86 ,POPCNT instructionset64bit,X86 ,AVXVNNI +instructionset64bit,X86 ,MOVBE vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 @@ -84,6 +86,7 @@ implication ,X86 ,POPCNT ,SSE42 implication ,X86 ,Vector128 ,SSE implication ,X86 ,Vector256 ,AVX implication ,X86 ,AVXVNNI ,AVX2 +implication ,X86 ,MOVBE ,SSE42 ; Definition of X64 instruction sets definearch ,X64 ,64Bit ,X64 diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index 86febba0a95795..7105c956162f5d 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -100,6 +100,7 @@ private static class XArchIntrinsicConstants public const int Bmi2 = 0x0800; public const int Lzcnt = 0x1000; public const int AvxVnni = 0x2000; + public const int Movbe = 0x4000; public static int FromInstructionSet(InstructionSet instructionSet) { @@ -137,6 +138,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_LZCNT_X64 => Lzcnt, InstructionSet.X64_AVXVNNI => AvxVnni, InstructionSet.X64_AVXVNNI_X64 => AvxVnni, + InstructionSet.X64_MOVBE => Movbe, + InstructionSet.X64_MOVBE_X64 => Movbe, // SSE and SSE2 are baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/tools/aot/ILCompiler/Program.cs b/src/coreclr/tools/aot/ILCompiler/Program.cs index 4d30c1cd4f564a..b2cdc4c9b1ebcf 100644 --- a/src/coreclr/tools/aot/ILCompiler/Program.cs +++ b/src/coreclr/tools/aot/ILCompiler/Program.cs @@ -492,6 +492,7 @@ private int Run(string[] args) optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sse4.2"); // Lower SSE versions included by implication optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("pclmul"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("movbe"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("popcnt"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lzcnt"); diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 155de4fcfd49c0..0a0434c29e0b3e 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1362,6 +1362,9 @@ void EEJitManager::SetCpuInfo() // CORJIT_FLAG_USE_SSE42 if the following feature bits are set (input EAX of 1) // CORJIT_FLAG_USE_SSE41 // SSE4.2 - ECX bit 20 + // CORJIT_FLAG_USE_MOVBE if the following feature bits are set (input EAX of 1) + // CORJIT_FLAG_USE_SSE42 + // MOVBE - ECX bit 22 // CORJIT_FLAG_USE_POPCNT if the following feature bits are set (input EAX of 1) // CORJIT_FLAG_USE_SSE42 // POPCNT - ECX bit 23 @@ -1433,6 +1436,11 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Set(InstructionSet_SSE42); + if ((cpuidInfo[ECX] & (1 << 22)) != 0) // MOVBE + { + CPUCompileFlags.Set(InstructionSet_MOVBE); + } + if ((cpuidInfo[ECX] & (1 << 23)) != 0) // POPCNT { CPUCompileFlags.Set(InstructionSet_POPCNT); @@ -1610,6 +1618,11 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Clear(InstructionSet_PCLMULQDQ); } + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableMOVBE)) + { + CPUCompileFlags.Clear(InstructionSet_MOVBE); + } + if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePOPCNT)) { CPUCompileFlags.Clear(InstructionSet_POPCNT); diff --git a/src/tests/JIT/Intrinsics/BinaryPrimitivesReverseEndianness.cs b/src/tests/JIT/Intrinsics/BinaryPrimitivesReverseEndianness.cs index e71bd36e36bd39..65908c7c1ee616 100644 --- a/src/tests/JIT/Intrinsics/BinaryPrimitivesReverseEndianness.cs +++ b/src/tests/JIT/Intrinsics/BinaryPrimitivesReverseEndianness.cs @@ -4,6 +4,7 @@ using System; using System.Buffers.Binary; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using Internal; @@ -20,12 +21,18 @@ class Program private const uint ConstantUInt32Input = 0x98765432; private const uint ConstantUInt32Expected = 0x32547698; + private const long ConstantInt64Input = 0x1edcba9876543210; + private const long ConstantInt64Expected = 0x1032547698badc1e; + private const ulong ConstantUInt64Input = 0xfedcba9876543210; private const ulong ConstantUInt64Expected = 0x1032547698badcfe; private static readonly byte[] s_bufferLE = new byte[] { 0x32, 0x54, 0x76, 0x98 }; private static readonly byte[] s_bufferBE = new byte[] { 0x98, 0x76, 0x54, 0x32 }; + private static readonly byte[] s_bufferLESigned64 = new byte[] { 0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0x1e }; + private static readonly byte[] s_bufferBESigned64 = new byte[] { 0x1e, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 }; + static int Main(string[] args) { /* @@ -73,6 +80,14 @@ static int Main(string[] args) return Fail; } + Span spanInt64 = BitConverter.IsLittleEndian ? s_bufferLESigned64 : s_bufferBESigned64; + long swappedSpanInt64 = BinaryPrimitives.ReverseEndianness(MemoryMarshal.Read(spanInt64)); + if (swappedSpanInt64 != ConstantInt64Expected) + { + ReportError("sign-extended Int64", ConstantInt64Input, swappedSpanInt64, ConstantInt64Expected); + return Fail; + } + /* * NON-CONST VALUE TESTS */ @@ -104,6 +119,167 @@ static int Main(string[] args) return Fail; } + /* + * READ TESTS CAST + */ + + byte readCastUInt16Input = (byte)DateTime.UtcNow.Ticks; + ushort readCastUInt16Output = ByteSwapUInt16_ReadCast(ref readCastUInt16Input); + ushort readCastUInt16Expected = ByteSwapUInt16_Control(readCastUInt16Input); + if (readCastUInt16Output != readCastUInt16Expected) + { + ReportError("read cast UInt16", readCastUInt16Input, readCastUInt16Output, readCastUInt16Expected); + return Fail; + } + + ushort readCastUInt32Input = (ushort)DateTime.UtcNow.Ticks; + uint readCastUInt32Output = ByteSwapUInt32_ReadCast(ref readCastUInt32Input); + uint readCastUInt32Expected = ByteSwapUInt32_Control(readCastUInt32Input); + if (readCastUInt32Output != readCastUInt32Expected) + { + ReportError("read cast UInt32", readCastUInt32Input, readCastUInt32Output, readCastUInt32Expected); + return Fail; + } + + uint readCastUInt64Input = (uint)DateTime.UtcNow.Ticks; + ulong readCastUInt64Output = ByteSwapUInt64_ReadCast(ref readCastUInt64Input); + ulong readCastUInt64Expected = ByteSwapUInt64_Control(readCastUInt64Input); + if (readCastUInt64Output != readCastUInt64Expected) + { + ReportError("read cast UInt64", readCastUInt64Input, readCastUInt64Output, readCastUInt64Expected); + return Fail; + } + + /* + * WRITE TESTS + */ + + ushort writeUInt16Output = default; + ushort writeUInt16Input = ByteSwapUInt16_Write(ref writeUInt16Output); + ushort writeUInt16Expected = ByteSwapUInt16_Control(writeUInt16Input); + if (writeUInt16Output != writeUInt16Expected) + { + ReportError("write UInt16", writeUInt16Input, writeUInt16Output, writeUInt16Expected); + return Fail; + } + + uint writeUInt32Output = default; + uint writeUInt32Input = ByteSwapUInt32_Write(ref writeUInt32Output); + uint writeUInt32Expected = ByteSwapUInt32_Control(writeUInt32Input); + if (writeUInt32Output != writeUInt32Expected) + { + ReportError("write UInt32", writeUInt32Input, writeUInt32Output, writeUInt32Expected); + return Fail; + } + + ulong writeUInt64Output = default; + ulong writeUInt64Input = ByteSwapUInt64_Write(ref writeUInt64Output); + ulong writeUInt64Expected = ByteSwapUInt64_Control(writeUInt64Input); + if (writeUInt64Output != writeUInt64Expected) + { + ReportError("write UInt64", writeUInt64Input, writeUInt64Output, writeUInt64Expected); + return Fail; + } + + /* + * WRITE TESTS LEA + */ + + ushort writeLeaUInt16Output = default; + ushort writeLeaUInt16Input = ByteSwapUInt16_WriteLea(ref writeLeaUInt16Output, 0); + ushort writeLeaUInt16Expected = ByteSwapUInt16_Control(writeLeaUInt16Input); + if (writeLeaUInt16Output != writeLeaUInt16Expected) + { + ReportError("write lea UInt16", writeLeaUInt16Input, writeLeaUInt16Output, writeLeaUInt16Expected); + return Fail; + } + + uint writeLeaUInt32Output = default; + uint writeLeaUInt32Input = ByteSwapUInt32_WriteLea(ref writeLeaUInt32Output, 0); + uint writeLeaUInt32Expected = ByteSwapUInt32_Control(writeLeaUInt32Input); + if (writeLeaUInt32Output != writeLeaUInt32Expected) + { + ReportError("write lea UInt32", writeLeaUInt32Input, writeLeaUInt32Output, writeLeaUInt32Expected); + return Fail; + } + + ulong writeLeaUInt64Output = default; + ulong writeLeaUInt64Input = ByteSwapUInt64_WriteLea(ref writeLeaUInt64Output, 0); + ulong writeLeaUInt64Expected = ByteSwapUInt64_Control(writeLeaUInt64Input); + if (writeLeaUInt64Output != writeLeaUInt64Expected) + { + ReportError("write lea UInt64", writeLeaUInt64Input, writeLeaUInt64Output, writeLeaUInt64Expected); + return Fail; + } + + /* + * WRITE TESTS CAST + */ + + ulong writeCastUInt8Input = (ulong)DateTime.UtcNow.Ticks; + byte writeCastUInt8Output = default; + ByteSwapUInt8_WriteCast(ref writeCastUInt8Output, writeCastUInt8Input); + ulong writeCastUInt8Expected = (byte)ByteSwapUInt64_Control(writeCastUInt8Input); + if (writeCastUInt8Output != writeCastUInt8Expected) + { + ReportError("write cast UInt8", writeCastUInt8Input, writeCastUInt8Output, writeCastUInt8Expected); + return Fail; + } + + ulong writeCastUInt16Input = (ulong)DateTime.UtcNow.Ticks; + ushort writeCastUInt16Output = default; + ByteSwapUInt16_WriteCast(ref writeCastUInt16Output, writeCastUInt16Input); + ulong writeCastUInt16Expected = (ushort)ByteSwapUInt64_Control(writeCastUInt16Input); + if (writeCastUInt16Output != writeCastUInt16Expected) + { + ReportError("write cast UInt16", writeCastUInt16Input, writeCastUInt16Output, writeCastUInt16Expected); + return Fail; + } + + ulong writeCastUInt32Input = (ulong)DateTime.UtcNow.Ticks; + uint writeCastUInt32Output = default; + ByteSwapUInt32_WriteCast(ref writeCastUInt32Output, writeCastUInt32Input); + ulong writeCastUInt32Expected = (uint)ByteSwapUInt64_Control(writeCastUInt32Input); + if (writeCastUInt32Output != writeCastUInt32Expected) + { + ReportError("write cast UInt32", writeCastUInt32Input, writeCastUInt32Output, writeCastUInt32Expected); + return Fail; + } + + /* + * READ & WRITE TESTS + */ + + ushort writeBackUInt16Input = (ushort)DateTime.UtcNow.Ticks; + ushort writeBackUInt16Output = writeBackUInt16Input; + ByteSwapUInt16_WriteBack(ref writeBackUInt16Output); + ushort writeBackUInt16Expected = ByteSwapUInt16_Control(writeBackUInt16Input); + if (writeBackUInt16Output != writeBackUInt16Expected) + { + ReportError("write back UInt16", writeBackUInt16Input, writeBackUInt16Output, writeBackUInt16Expected); + return Fail; + } + + uint writeBackUInt32Input = (uint)DateTime.UtcNow.Ticks; + uint writeBackUInt32Output = writeBackUInt32Input; + ByteSwapUInt32_WriteBack(ref writeBackUInt32Output); + uint writeBackUInt32Expected = ByteSwapUInt32_Control(writeBackUInt32Input); + if (writeBackUInt32Output != writeBackUInt32Expected) + { + ReportError("write back UInt32", writeBackUInt32Input, writeBackUInt32Output, writeBackUInt32Expected); + return Fail; + } + + ulong writeBackUInt64Input = (ulong)DateTime.UtcNow.Ticks; + ulong writeBackUInt64Output = writeBackUInt64Input; + ByteSwapUInt64_WriteBack(ref writeBackUInt64Output); + ulong writeBackUInt64Expected = ByteSwapUInt64_Control(writeBackUInt64Input); + if (writeBackUInt64Output != writeBackUInt64Expected) + { + ReportError("write back UInt64", writeBackUInt64Input, writeBackUInt64Output, writeBackUInt64Expected); + return Fail; + } + return Pass; } @@ -143,6 +319,120 @@ private static ulong ByteSwapUnsigned_General(ulong value, int width) return retVal; } + [MethodImpl(MethodImplOptions.NoInlining)] + private static ushort ByteSwapUInt16_ReadCast(ref byte input) + { + return BinaryPrimitives.ReverseEndianness((ushort)input); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static uint ByteSwapUInt32_ReadCast(ref ushort input) + { + return BinaryPrimitives.ReverseEndianness((uint)input); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ulong ByteSwapUInt64_ReadCast(ref uint input) + { + return BinaryPrimitives.ReverseEndianness((ulong)input); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ushort ByteSwapUInt16_Write(ref ushort output) + { + ushort input = (ushort)DateTime.UtcNow.Ticks; + + output = BinaryPrimitives.ReverseEndianness(input); + + return input; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static uint ByteSwapUInt32_Write(ref uint output) + { + uint input = (uint)DateTime.UtcNow.Ticks; + + output = BinaryPrimitives.ReverseEndianness(input); + + return input; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ulong ByteSwapUInt64_Write(ref ulong output) + { + ulong input = (ulong)DateTime.UtcNow.Ticks; + + output = BinaryPrimitives.ReverseEndianness(input); + + return input; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ushort ByteSwapUInt16_WriteLea(ref ushort output, int offset) + { + ushort input = (ushort)DateTime.UtcNow.Ticks; + + Unsafe.Add(ref output, offset) = BinaryPrimitives.ReverseEndianness(input); + + return input; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static uint ByteSwapUInt32_WriteLea(ref uint output, int offset) + { + uint input = (uint)DateTime.UtcNow.Ticks; + + Unsafe.Add(ref output, offset) = BinaryPrimitives.ReverseEndianness(input); + + return input; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static ulong ByteSwapUInt64_WriteLea(ref ulong output, int offset) + { + ulong input = (ulong)DateTime.UtcNow.Ticks; + + Unsafe.Add(ref output, offset) = BinaryPrimitives.ReverseEndianness(input); + + return input; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ByteSwapUInt16_WriteBack(ref ushort value) + { + value = BinaryPrimitives.ReverseEndianness(value); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ByteSwapUInt32_WriteBack(ref uint value) + { + value = BinaryPrimitives.ReverseEndianness(value); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ByteSwapUInt64_WriteBack(ref ulong value) + { + value = BinaryPrimitives.ReverseEndianness(value); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ByteSwapUInt8_WriteCast(ref byte output, ulong input) + { + output = (byte)BinaryPrimitives.ReverseEndianness(input); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ByteSwapUInt16_WriteCast(ref ushort output, ulong input) + { + output = (ushort)BinaryPrimitives.ReverseEndianness(input); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ByteSwapUInt32_WriteCast(ref uint output, ulong input) + { + output = (uint)BinaryPrimitives.ReverseEndianness(input); + } + private static string GetHexString(T value) { if (typeof(T) == typeof(short))