From 6e6573a80ab9c06623c46250a75fb80dfeb2c5e7 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Tue, 13 Jan 2026 11:41:00 +0000 Subject: [PATCH 01/13] arm64: Add GT_BFI node --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegenarm64.cpp | 32 ++ src/coreclr/jit/codegenarmarch.cpp | 4 + src/coreclr/jit/compiler.h | 4 + src/coreclr/jit/gentree.cpp | 10 + src/coreclr/jit/gentree.h | 35 ++ src/coreclr/jit/gtlist.h | 1 + src/coreclr/jit/gtstructs.h | 1 + src/coreclr/jit/lower.cpp | 309 +++++++++++++++++ src/coreclr/jit/lower.h | 25 ++ src/coreclr/jit/lowerarmarch.cpp | 9 + src/coreclr/jit/lsraarm64.cpp | 10 + .../CoreCLRTestLibrary.csproj | 1 + src/tests/Common/CoreCLRTestLibrary/Expect.cs | 26 ++ src/tests/JIT/opt/InstructionCombining/Bfi.cs | 310 ++++++++++++++++++ .../JIT/opt/InstructionCombining/Bfi.csproj | 18 + 16 files changed, 796 insertions(+) create mode 100644 src/tests/Common/CoreCLRTestLibrary/Expect.cs create mode 100644 src/tests/JIT/opt/InstructionCombining/Bfi.cs create mode 100644 src/tests/JIT/opt/InstructionCombining/Bfi.csproj diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 927c04965dc04f..127fb4e12230a5 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1256,6 +1256,7 @@ class CodeGen final : public CodeGenInterface void genCodeForJumpCompare(GenTreeOpCC* tree); void genCompareImmAndJump( GenCondition::Code cond, regNumber reg, ssize_t compareImm, emitAttr size, BasicBlock* target); + void genCodeForBfi(GenTreeOp* tree); void genCodeForBfiz(GenTreeOp* tree); #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 17c796ae792b28..8dc5c98b4ab076 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5831,6 +5831,38 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) } } +//------------------------------------------------------------------------ +// genCodeForBfi: Generates the code sequence for a GenTree node that +// represents a bitfield insert. +// +// Arguments: +// tree - the bitfield insert. +// +void CodeGen::genCodeForBfi(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_BFI)); + + GenTreeBfi* bfi = tree->AsBfi(); + + emitAttr size = emitActualTypeSize(tree); + unsigned regBits = emitter::getBitWidth(size); + + GenTree* base = tree->gtGetOp1(); + GenTree* src = tree->gtGetOp2(); + + genConsumeOperands(bfi); + + unsigned offset = bfi->GetOffset(); + unsigned width = bfi->GetWidth(); + + assert(width >= 1 && width <= regBits); + assert(offset < regBits && (offset + width) <= regBits); + + GetEmitter()->emitIns_R_R_I_I(INS_bfi, size, base->GetRegNum(), src->GetRegNum(), (int)offset, (int)width); + + genProduceReg(tree); +} + //------------------------------------------------------------------------ // genCodeForBfiz: Generates the code sequence for a GenTree node that // represents a bitfield insert in zero with sign/zero extension. diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index ded5f7fec2daf8..7101d9893e2db9 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -329,6 +329,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForSwap(treeNode->AsOp()); break; + case GT_BFI: + genCodeForBfi(treeNode->AsOp()); + break; + case GT_BFIZ: genCodeForBfiz(treeNode->AsOp()); break; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ca75a19e7f4cfc..57b2b7d956b0f6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2983,6 +2983,10 @@ class Compiler GenTreeColon* gtNewColonNode(var_types type, GenTree* thenNode, GenTree* elseNode); GenTreeQmark* gtNewQmarkNode(var_types type, GenTree* cond, GenTreeColon* colon); +#if defined(TARGET_ARM64) + GenTreeBfi* gtNewBfiNode(var_types type, GenTree* base, GenTree* src, unsigned offset, unsigned width); +#endif + GenTree* gtNewLargeOperNode(genTreeOps oper, var_types type = TYP_I_IMPL, GenTree* op1 = nullptr, diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ab235263b96d80..f98261f79ebe5c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7806,6 +7806,16 @@ GenTreeQmark* Compiler::gtNewQmarkNode(var_types type, GenTree* cond, GenTreeCol return result; } +#if defined(TARGET_ARM64) +GenTreeBfi* Compiler::gtNewBfiNode(var_types type, GenTree* base, GenTree* src, unsigned offset, unsigned width) +{ + GenTreeBfi* result = new (this, GT_BFI) GenTreeBfi(type, base, src, offset, width); + result->gtFlags |= (base->gtFlags | src->gtFlags) & (GTF_ALL_EFFECT); + result->gtFlags &= ~GTF_SET_FLAGS; + return result; +} +#endif + GenTreeIntCon* Compiler::gtNewIconNode(ssize_t value, var_types type) { assert(genActualType(type) == type); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 0aeae5512d44fb..28f1e7809854aa 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5967,6 +5967,41 @@ struct GenTreeQmark : public GenTreeOp #endif }; +#ifdef TARGET_ARM64 +struct GenTreeBfi : public GenTreeOp +{ + unsigned gtOffset; + unsigned gtWidth; + + GenTreeBfi(var_types type, GenTree* base, GenTree* src, unsigned offset, unsigned width) + : GenTreeOp(GT_BFI, type, base, src) + , gtOffset(offset) + , gtWidth(width) + { + } + + unsigned GetOffset() const + { + return gtOffset; + } + unsigned GetWidth() const + { + return gtWidth; + } + unsigned GetMask() const + { + return ((~0ULL >> (64 - gtWidth)) << gtOffset); + } + +#if DEBUGGABLE_GENTREE + GenTreeBfi() + : GenTreeOp() + { + } +#endif +}; +#endif + /* gtIntrinsic -- intrinsic (possibly-binary op [NULL op2 is allowed] with an additional field) */ struct GenTreeIntrinsic : public GenTreeOp diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index d30b6f10c5d612..d4f88ee01c77f0 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -221,6 +221,7 @@ GTNODE(OR_NOT , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) GTNODE(XOR_NOT , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) #ifdef TARGET_ARM64 +GTNODE(BFI , GenTreeBfi ,0,0,GTK_BINOP|GTK_EXOP|DBK_NOTHIR) // Bitfield Insert. GTNODE(BFIZ , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) // Bitfield Insert in Zero. #endif diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index 6e7d62e496f038..c8bee7ca7b537d 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -95,6 +95,7 @@ GTSTRUCT_1(StoreInd , GT_STOREIND) GTSTRUCT_1(CmpXchg , GT_CMPXCHG) #ifdef TARGET_ARM64 GTSTRUCT_N(Conditional , GT_SELECT, GT_SELECT_INC, GT_SELECT_INV, GT_SELECT_NEG) +GTSTRUCT_1(Bfi , GT_BFI) #else GTSTRUCT_N(Conditional , GT_SELECT) #endif //TARGET_ARM64 diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index fe481c5bab7b16..40ee24ab00229a 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -12010,6 +12010,315 @@ bool Lowering::TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next) return true; } +#ifdef TARGET_ARM64 +//------------------------------------------------------------------------ +// TryLowerOrToBFI : Lower OR of 2 masking operations into a BFI node +// OR op1 can be a const var, AND, BFI or OR node +// OR op2 can be a LSH & AND or a BFIZ node +// +// Arguments: +// tree - pointer to the node +// next - [out] Next node to lower if this function returns true +// +// Return Value: +// false if no changes were made +// +bool Lowering::TryLowerOrToBFI(GenTreeOp* tree, GenTree** next) +{ + assert(tree->OperIs(GT_OR)); + + if (!comp->opts.OptimizationEnabled()) + { + return false; + } + + BfiPattern bfiPattern; + if (!TryMatchOrToBfiPattern(tree, &bfiPattern)) + { + return false; + } + + unsigned regBits = genTypeSize(tree) * BITS_PER_BYTE; + uint64_t regMask = (regBits == 64) ? UINT64_MAX : ((1ull << regBits) - 1); + + uint64_t newMask = (bfiPattern.lowMask << bfiPattern.offset) & regMask; + uint64_t baseMask = 0; + bool baseMaskKnown = false; + + GenTree* node = bfiPattern.base; + for (int depth = 0; depth < 64 && (node != nullptr); depth++) + { + if (node->OperIs(GT_AND)) + { + GenTree* andOp1 = node->gtGetOp1(); + GenTree* andOp2 = node->gtGetOp2(); + + GenTree* valNode = andOp1; + GenTree* constNode = andOp2; + if (!constNode->IsIntegralConst()) + { + std::swap(valNode, constNode); + if (!constNode->IsIntegralConst()) + { + baseMaskKnown = false; + break; + } + } + + uint64_t c = (uint64_t)constNode->AsIntConCommon()->IntegralValue(); + c &= regMask; + baseMask |= c; + baseMaskKnown = true; + break; + } + else if (node->IsIntegralConst()) + { + uint64_t c = (uint64_t)node->AsIntConCommon()->IntegralValue(); + c &= regMask; + baseMask |= c; + baseMaskKnown = true; + break; + } + else if (node->OperIs(GT_BFI)) + { + GenTreeBfi* bfi = node->AsBfi(); + uint64_t m = bfi->GetMask() & regMask; + baseMask |= m; + baseMaskKnown = true; + + node = bfi->gtGetOp1(); + continue; + } + else if (node->OperIs(GT_OR)) + { + BfiPattern nested; + if (!TryMatchOrToBfiPattern(node->AsOp(), &nested)) + { + baseMaskKnown = false; + break; + } + + uint64_t subMask = (nested.lowMask << nested.offset) & regMask; + baseMask |= subMask; + baseMaskKnown = true; + + node = nested.base; + continue; + } + else + { + baseMaskKnown = false; + break; + } + } + + if (!baseMaskKnown || ((baseMask & newMask) != 0)) + { + return false; + } + + var_types ty = genActualType(tree->TypeGet()); + GenTreeBfi* bfi = + comp->gtNewBfiNode(ty, bfiPattern.base, bfiPattern.value, static_cast(bfiPattern.offset), + static_cast(bfiPattern.width)); + bfi->CopyCosts(tree); + + ContainCheckNode(bfi); + + BlockRange().InsertBefore(tree, bfi); + + LIR::Use use; + if (BlockRange().TryGetUse(tree, &use)) + { + use.ReplaceWith(bfi); + } + + // Remove old nodes depending on pattern kind + switch (bfiPattern.kind) + { + case BfiPatternKind::FromLshAndMask: + BlockRange().Remove(bfiPattern.shiftAnd); + BlockRange().Remove(bfiPattern.shiftAndConst); + BlockRange().Remove(bfiPattern.shiftConst); + BlockRange().Remove(bfiPattern.shiftNode); + break; + + case BfiPatternKind::FromBfiz: + // Remove CAST first (it is op1 of BFIZ) + if (bfiPattern.castNode != nullptr) + { + BlockRange().Remove(bfiPattern.castNode); + } + if (bfiPattern.shiftConst != nullptr) + { + BlockRange().Remove(bfiPattern.shiftConst); + } + BlockRange().Remove(bfiPattern.shiftNode); // BFIZ node + break; + + default: + return false; + } + + BlockRange().Remove(tree); + + *next = bfi->gtNext; + return true; +} + +//------------------------------------------------------------------------ +// TryMatchOrToBfiPattern : Check if the tree op2 matches the 2 valid +// BFI patterns. +// Case A: The op2 is a LSH node with a AND performing a constant mask +// Case B: The op2 is a BFIZ node with a CAST node +// +// Arguments: +// tree - pointer to the or node +// result - [out] BfiPattern struct containing pointers to nodes +// of the found pattern. +// +// Return Value: +// false if the or node doesn't match the required pattern +// +bool Lowering::TryMatchOrToBfiPattern(GenTreeOp* tree, BfiPattern* result) +{ + assert(tree->OperIs(GT_OR)); + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + if (!op1->OperIs(GT_LSH, GT_BFIZ) && !op2->OperIs(GT_LSH, GT_BFIZ)) + { + return false; + } + + GenTree* orOp1 = op1->OperIs(GT_LSH, GT_BFIZ) ? op1 : op2; + GenTree* base = (orOp1 == op1) ? op2 : op1; + + unsigned regBits = genTypeSize(tree) * BITS_PER_BYTE; + uint64_t regMask = (regBits == 64) ? UINT64_MAX : ((1ull << regBits) - 1); + + // Case A: OR(base, LSH(AND(value, lowMask), offset)) + if (orOp1->OperIs(GT_LSH)) + { + GenTree* shiftAnd = orOp1->gtGetOp1(); + GenTree* shiftConst = orOp1->gtGetOp2(); + + if (!shiftAnd->OperIs(GT_AND) || !shiftConst->IsIntegralConst()) + { + return false; + } + + // Allow const on either side of AND + GenTree* valueNode = shiftAnd->gtGetOp1(); + GenTree* constNode = shiftAnd->gtGetOp2(); + + if (!constNode->IsIntegralConst()) + { + std::swap(valueNode, constNode); + if (!constNode->IsIntegralConst()) + { + return false; + } + } + + ssize_t shiftVal = shiftConst->AsIntConCommon()->IntegralValue(); + if (shiftVal < 0) + { + return false; + } + + uint64_t lowMask = (uint64_t)constNode->AsIntConCommon()->IntegralValue(); + lowMask &= regMask; + + if (lowMask == 0) + { + return false; + } + + // lowMask must be contiguous from LSB + if ((lowMask & (lowMask + 1)) != 0) + { + return false; + } + + uint64_t width = (uint64_t)BitOperations::PopCount(lowMask); + uint64_t offset = (uint64_t)shiftVal; + + if (offset >= regBits || (offset + width) > regBits) + { + return false; + } + + result->kind = BfiPatternKind::FromLshAndMask; + result->base = base; + result->value = valueNode; + result->shiftNode = orOp1; + result->shiftAnd = shiftAnd; + result->shiftAndConst = constNode; + result->shiftConst = shiftConst; + result->castNode = nullptr; + result->lowMask = lowMask; + result->offset = offset; + result->width = width; + return true; + } + + // Case B: OR(base, BFIZ(CAST(...), shiftByConst)) + assert(orOp1->OperIs(GT_BFIZ)); + + GenTree* shiftConst = orOp1->gtGetOp2(); + if ((shiftConst == nullptr) || !shiftConst->IsIntegralConst()) + { + return false; + } + + ssize_t shiftVal = shiftConst->AsIntConCommon()->IntegralValue(); + if (shiftVal < 0) + { + return false; + } + + GenTree* bfizOp1 = orOp1->gtGetOp1(); + if ((bfizOp1 == nullptr) || !bfizOp1->OperIs(GT_CAST)) + { + return false; + } + + GenTreeCast* cast = bfizOp1->AsCast(); + GenTree* castOp = cast->CastOp(); + + uint64_t width = (uint64_t)varTypeIsSmall(cast->CastToType()) ? genTypeSize(cast->CastToType()) * BITS_PER_BYTE + : genTypeSize(castOp) * BITS_PER_BYTE; + if ((width == 0) || (width > regBits)) + { + return false; + } + + uint64_t offset = ((uint64_t)shiftVal) & (regBits - 1); + if (offset >= regBits || (offset + width) > regBits) + { + return false; + } + + uint64_t lowMask = (width == 64) ? UINT64_MAX : ((1ull << width) - 1); + lowMask &= regMask; + + result->kind = BfiPatternKind::FromBfiz; + result->base = base; + result->value = cast->CastOp(); + result->shiftNode = orOp1; + result->shiftConst = shiftConst; + result->castNode = bfizOp1; + result->shiftAnd = nullptr; + result->shiftAndConst = nullptr; + result->lowMask = lowMask; + result->offset = offset; + result->width = width; + return true; +} +#endif + //------------------------------------------------------------------------ // ContainCheckConditionalCompare: determine whether the source of a compare within a compare chain should be contained. // diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 6f90e68327b247..33eb39d8a3cc04 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -99,6 +99,31 @@ class Lowering final : public Phase bool TryLowerNegToMulLongOp(GenTreeOp* op, GenTree** next); bool TryContainingCselOp(GenTreeHWIntrinsic* parentNode, GenTreeHWIntrinsic* childNode); #endif +#if defined(TARGET_ARM64) + bool TryLowerOrToBFI(GenTreeOp* tree, GenTree** next); + + enum class BfiPatternKind + { + FromLshAndMask, + FromBfiz + }; + + struct BfiPattern + { + BfiPatternKind kind; + GenTree* base; + GenTree* value; + GenTree* shiftAnd; + GenTree* shiftAndConst; + GenTree* shiftConst; + GenTree* shiftNode; + GenTree* castNode; + uint64_t lowMask; + uint64_t offset; + uint64_t width; + }; + bool TryMatchOrToBfiPattern(GenTreeOp* orTree, BfiPattern* result); +#endif #ifdef TARGET_RISCV64 bool TryLowerShiftAddToShxadd(GenTreeOp* tree, GenTree** next); bool TryLowerZextAddToAddUw(GenTreeOp* tree, GenTree** next); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index c8f677347601cc..8b66125ab81c25 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -661,6 +661,15 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) } } + if (binOp->OperIs(GT_OR)) + { + GenTree* next; + if (TryLowerOrToBFI(binOp, &next)) + { + return next; + } + } + if (binOp->OperIs(GT_SUB)) { // Attempt to optimize for umsubl/smsubl. diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 2637f08b81796b..64e000850c6356 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -871,6 +871,16 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree); break; + case GT_BFI: + { + tgtPrefUse = BuildUse(tree->gtGetOp1()); + srcCount = 1; + RefPosition* srcUse = nullptr; + srcCount += BuildDelayFreeUses(tree->gtGetOp2(), tree, RBM_NONE, &srcUse); + BuildDef(tree); + break; + } + case GT_BFIZ: assert(tree->gtGetOp1()->OperIs(GT_CAST)); srcCount = BuildOperandUses(tree->gtGetOp1()->gtGetOp1()); diff --git a/src/tests/Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj b/src/tests/Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj index 6bbebee74d7177..915e6b1ff1c632 100644 --- a/src/tests/Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj +++ b/src/tests/Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj @@ -7,6 +7,7 @@ + diff --git a/src/tests/Common/CoreCLRTestLibrary/Expect.cs b/src/tests/Common/CoreCLRTestLibrary/Expect.cs new file mode 100644 index 00000000000000..b9d62483d82fff --- /dev/null +++ b/src/tests/Common/CoreCLRTestLibrary/Expect.cs @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Linq.Expressions; +using Xunit; + +namespace TestLibrary; + +public static class Expect +{ + public static void ExpectEqual(Expression> expr, T expected, ref bool fail) + { + var compiled = expr.Compile(); + T actual = compiled(); + + // Get just the expression body text + string exprText = expr.Body.ToString(); + + if (!Equals(actual, expected)) + { + Console.WriteLine($"{exprText} = {actual}, expected {expected}"); + fail = true; + } + } +} diff --git a/src/tests/JIT/opt/InstructionCombining/Bfi.cs b/src/tests/JIT/opt/InstructionCombining/Bfi.cs new file mode 100644 index 00000000000000..06369c8f5819ef --- /dev/null +++ b/src/tests/JIT/opt/InstructionCombining/Bfi.cs @@ -0,0 +1,310 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using static TestLibrary.Expect; +using Xunit; + +namespace TestBfi +{ + public class Program + { + [MethodImpl(MethodImplOptions.NoInlining)] + [Fact] + public static int CheckBfi() + { + bool fail = false; + + ExpectEqual(() => ComposeBits_BaseAnd_Mask0(0xB, 0x1), 0xB, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_Mask1(0xB, 0x1), 0x1B, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_Mask2(0xB, 0x2), 0x2B, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_Mask3(0xB, 0x2), 0x2B, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_MaskFF(0xB, 0xFA), 0xFAB, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_Shift0(0xE, 0x1), 0x7, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_Shift31(0xE, 0x1), 0x8000000E, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_Shift31_Mask3(0xA, 0x3), 0x8000000A, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_Shift32(0xE, 0x1), 0xF, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_Overlap(0x1, 0x3), 0xD, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_Variable(0x1000, 0xC), 0xD000, ref fail); + ExpectEqual(() => ComposeBits_BaseAnd_Variables(0xF, 0xA, 0x2), 0x2A, ref fail); + ExpectEqual(() => ComposeBits_BaseConstant_Mask3(0x3), 0x3A, ref fail); + ExpectEqual(() => ComposeBits_BaseConstant_Mask4(0x7), 0x4A, ref fail); + ExpectEqual(() => ComposeBits_BaseConstant_Overlap(0x2), 0x3F, ref fail); + ExpectEqual(() => ComposeBits_BaseBfi(0xB, 0x2, 0x4C), 0x132B, ref fail); + ExpectEqual(() => ComposeBits_BaseBfi_SwapOrder(0xB, 0x2, 0x4C), 0x132B, ref fail); + ExpectEqual(() => ComposeBits_BaseBfi_BfiOverlap(0xB, 0x2, 0x4C), 0x9AB, ref fail); + ExpectEqual(() => ComposeBits_BaseBfi_OverlapBfi(0xB, 0x2, 0x4C), 0x99B, ref fail); + ExpectEqual(() => ComposeBits_BaseBfi_BfiOverlapBfi(0xB, 0x2, 0x4C, 0x5), 0x59AB, ref fail); + ExpectEqual(() => ComposeBits_BaseBfi_NoBfiAfterInvalidPattern(0xB, 0x2, 0xFC, 0xFF, 0x7), 0xFFAB, ref fail); + ExpectEqual(() => ComposeBits_Pack32Values(1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1, + 1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,1), 0xABCDEF01, ref fail); + + if (fail) + { + return 101; + } + return 100; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Mask0(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + return (a & 0xF) | ((b & 0x0) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Mask1(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #1 + return (a & 0xf) | ((b & 0x1) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Mask2(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #2 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4 + return (a & 0xf) | ((b & 0x2) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Mask3(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + return (a & 0xf) | ((b & 0x3) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_MaskFF(int a, int b) + { + //AR M64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //AR M64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + return (a & 0xf) | ((b & 0xFF) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseAnd_Shift0(uint a, uint b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #7 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #1 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + return (a & 0x7) | (b & 0x1); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseAnd_Shift31(uint a, uint b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #31, #1 + return (a & 0xf) | ((b & 0x1) << 31); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseAnd_Shift31_Mask3(uint a, uint b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #31 + return (a & 0xf) | ((b & 0x3) << 31); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseAnd_Shift32(uint a, uint b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #1 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + return (a & 0xf) | ((b & 0x1) << 32); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Overlap(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #7 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #2 + return (a & 0x7) | ((b & 0x3) << 2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Variable(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #12 + return a | ((b & 0xF) << 12); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Variables(int a, int b, int c) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4 + return (a & b) | ((c & 0x3) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseConstant_Mask3(int a) + { + //ARM64-FULL-LINE: mov {{w[0-9]+}}, #10 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + return 0xA | ((a & 0x3) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseConstant_Mask4(int a) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #4 + //ARM64-FULL-LINE: mov {{w[0-9]+}}, #10 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4 + return 0xA | ((a & 0x4) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseConstant_Overlap(int a) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 + //ARM64-FULL-LINE: lsl {{w[0-9]+}}, {{w[0-9]+}}, #4 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, #31 + return 0x1F | ((a & 0x3) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseBfi(uint a, uint b, uint c) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #6, #7 + return (a & 0xf) | ((b & 0x3) << 4) | ((c & 0x7F) << 6); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseBfi_SwapOrder(uint a, uint b, uint c) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #6, #7 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + return (a & 0xf) | ((c & 0x7F) << 6) | ((b & 0x3) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseBfi_BfiOverlap(uint a, uint b, uint c) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #127 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #5 + return (a & 0xf) | ((b & 0x3) << 4) | ((c & 0x7F) << 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseBfi_OverlapBfi(uint a, uint b, uint c) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #3 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #5, #7 + return (a & 0xf) | ((b & 0x3) << 3) | ((c & 0x7F) << 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseBfi_BfiOverlapBfi(uint a, uint b, uint c, uint d) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #127 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #5 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #12, #3 + return (a & 0xf) | ((b & 0x3) << 4) | ((c & 0x7F) << 5) | ((d & 0x7) << 12); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseBfi_NoBfiAfterInvalidPattern(uint a, uint b, uint c, uint d, uint e) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #5 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #7 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #13 + return (a & 0xf) | ((b & 0x3) << 4) | ((c & d) << 5) | ((e & 0x7) << 13); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_Pack32Values( + uint b0, uint b1, uint b2, uint b3, + uint b4, uint b5, uint b6, uint b7, + uint b8, uint b9, uint b10, uint b11, + uint b12, uint b13, uint b14, uint b15, + uint b16, uint b17, uint b18, uint b19, + uint b20, uint b21, uint b22, uint b23, + uint b24, uint b25, uint b26, uint b27, + uint b28, uint b29, uint b30, uint b31) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #1, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #2, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #5, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #6, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #7, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #8, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #9, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #10, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #11, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #12, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #13, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #14, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #15, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #16, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #17, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #18, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #19, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #20, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #21, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #22, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #23, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #24, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #25, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #1 + return ((b0 & 1u) << 0) | + ((b1 & 1u) << 1) | + ((b2 & 1u) << 2) | + ((b3 & 1u) << 3) | + ((b4 & 1u) << 4) | + ((b5 & 1u) << 5) | + ((b6 & 1u) << 6) | + ((b7 & 1u) << 7) | + ((b8 & 1u) << 8) | + ((b9 & 1u) << 9) | + ((b10 & 1u) << 10) | + ((b11 & 1u) << 11) | + ((b12 & 1u) << 12) | + ((b13 & 1u) << 13) | + ((b14 & 1u) << 14) | + ((b15 & 1u) << 15) | + ((b16 & 1u) << 16) | + ((b17 & 1u) << 17) | + ((b18 & 1u) << 18) | + ((b19 & 1u) << 19) | + ((b20 & 1u) << 20) | + ((b21 & 1u) << 21) | + ((b22 & 1u) << 22) | + ((b23 & 1u) << 23) | + ((b24 & 1u) << 24) | + ((b25 & 1u) << 25) | + ((b26 & 1u) << 26) | + ((b27 & 1u) << 27) | + ((b28 & 1u) << 28) | + ((b29 & 1u) << 29) | + ((b30 & 1u) << 30) | + ((b31 & 1u) << 31); + } + } +} diff --git a/src/tests/JIT/opt/InstructionCombining/Bfi.csproj b/src/tests/JIT/opt/InstructionCombining/Bfi.csproj new file mode 100644 index 00000000000000..21cac1ee0cb80d --- /dev/null +++ b/src/tests/JIT/opt/InstructionCombining/Bfi.csproj @@ -0,0 +1,18 @@ + + + + true + + + None + True + + + + true + + + + + + From 83a60044ed8af1604b7a41dff8a41a3c2a1c35be Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Tue, 13 Jan 2026 11:41:31 +0000 Subject: [PATCH 02/13] arm64: Add GT_BFX node --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegenarm64.cpp | 40 ++- src/coreclr/jit/codegenarmarch.cpp | 4 + src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/compiler.hpp | 12 + src/coreclr/jit/gentree.cpp | 21 +- src/coreclr/jit/gentree.h | 11 +- src/coreclr/jit/gtlist.h | 3 +- src/coreclr/jit/gtstructs.h | 2 +- src/coreclr/jit/lower.cpp | 83 +++++- src/coreclr/jit/lower.h | 1 + src/coreclr/jit/lowerarmarch.cpp | 19 +- src/coreclr/jit/lsraarm64.cpp | 8 + src/tests/JIT/opt/InstructionCombining/Bfx.cs | 267 ++++++++++++++++++ .../JIT/opt/InstructionCombining/Bfx.csproj | 18 ++ 15 files changed, 465 insertions(+), 28 deletions(-) create mode 100644 src/tests/JIT/opt/InstructionCombining/Bfx.cs create mode 100644 src/tests/JIT/opt/InstructionCombining/Bfx.csproj diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 127fb4e12230a5..994ded82eca765 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1258,6 +1258,7 @@ class CodeGen final : public CodeGenInterface GenCondition::Code cond, regNumber reg, ssize_t compareImm, emitAttr size, BasicBlock* target); void genCodeForBfi(GenTreeOp* tree); void genCodeForBfiz(GenTreeOp* tree); + void genCodeForBfx(GenTreeOp* tree); #endif // TARGET_ARM64 void genEHCatchRet(BasicBlock* block); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 8dc5c98b4ab076..7f418967e56a8d 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5842,7 +5842,7 @@ void CodeGen::genCodeForBfi(GenTreeOp* tree) { assert(tree->OperIs(GT_BFI)); - GenTreeBfi* bfi = tree->AsBfi(); + GenTreeBfm* bfm = tree->AsBfm(); emitAttr size = emitActualTypeSize(tree); unsigned regBits = emitter::getBitWidth(size); @@ -5850,10 +5850,10 @@ void CodeGen::genCodeForBfi(GenTreeOp* tree) GenTree* base = tree->gtGetOp1(); GenTree* src = tree->gtGetOp2(); - genConsumeOperands(bfi); + genConsumeOperands(bfm); - unsigned offset = bfi->GetOffset(); - unsigned width = bfi->GetWidth(); + unsigned offset = bfm->GetOffset(); + unsigned width = bfm->GetWidth(); assert(width >= 1 && width <= regBits); assert(offset < regBits && (offset + width) <= regBits); @@ -5890,6 +5890,38 @@ void CodeGen::genCodeForBfiz(GenTreeOp* tree) genProduceReg(tree); } +//------------------------------------------------------------------------ +// genCodeForBfx: Generates the code sequence for a GenTree node that +// represents a bitfield extract. +// +// Arguments: +// tree - the bitfield extract. +// +void CodeGen::genCodeForBfx(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_BFX)); + + GenTreeBfm* bfm = tree->AsBfm(); + emitAttr size = emitActualTypeSize(tree); + + GenTree* src = tree->gtGetOp1(); + + const unsigned bitWidth = emitter::getBitWidth(size); + const unsigned lsb = bfm->GetOffset(); + const unsigned width = bfm->GetWidth(); + + assert((bitWidth == 32) || (bitWidth == 64)); + assert(lsb < bitWidth); + assert(width > 0); + assert((lsb + width) <= bitWidth); + + genConsumeRegs(src); + + GetEmitter()->emitIns_R_R_I_I(INS_ubfx, size, tree->GetRegNum(), src->GetRegNum(), (int)lsb, (int)width); + + genProduceReg(tree); +} + //------------------------------------------------------------------------ // JumpKindToInsCond: Convert a Jump Kind to a condition. // diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 7101d9893e2db9..a4801d94c38d86 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -336,6 +336,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_BFIZ: genCodeForBfiz(treeNode->AsOp()); break; + + case GT_BFX: + genCodeForBfx(treeNode->AsOp()); + break; #endif // TARGET_ARM64 case GT_JMP: diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 57b2b7d956b0f6..b1f0d792cf5ecf 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2984,7 +2984,8 @@ class Compiler GenTreeQmark* gtNewQmarkNode(var_types type, GenTree* cond, GenTreeColon* colon); #if defined(TARGET_ARM64) - GenTreeBfi* gtNewBfiNode(var_types type, GenTree* base, GenTree* src, unsigned offset, unsigned width); + GenTreeBfm* gtNewBfiNode(var_types type, GenTree* base, GenTree* src, unsigned offset, unsigned width); + GenTreeBfm* gtNewBfxNode(var_types type, GenTree* base, unsigned offset, unsigned width); #endif GenTree* gtNewLargeOperNode(genTreeOps oper, diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 3465a46b8dd223..baa2dc15bb3301 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4629,6 +4629,18 @@ GenTree::VisitResult GenTree::VisitOperands(TVisitor visitor) return visitor(cond->gtOp2); } +#ifdef TARGET_ARM64 + case GT_BFX: + { + GenTree* op1 = gtGetOp1(); + if (op1 != nullptr) + { + return visitor(op1); + } + return VisitResult::Continue; + } +#endif + // Binary nodes default: assert(this->OperIsBinary()); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index f98261f79ebe5c..16e346fcd70b17 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7807,13 +7807,21 @@ GenTreeQmark* Compiler::gtNewQmarkNode(var_types type, GenTree* cond, GenTreeCol } #if defined(TARGET_ARM64) -GenTreeBfi* Compiler::gtNewBfiNode(var_types type, GenTree* base, GenTree* src, unsigned offset, unsigned width) +GenTreeBfm* Compiler::gtNewBfiNode(var_types type, GenTree* base, GenTree* src, unsigned offset, unsigned width) { - GenTreeBfi* result = new (this, GT_BFI) GenTreeBfi(type, base, src, offset, width); + GenTreeBfm* result = new (this, GT_BFI) GenTreeBfm(GT_BFI, type, base, src, offset, width); result->gtFlags |= (base->gtFlags | src->gtFlags) & (GTF_ALL_EFFECT); result->gtFlags &= ~GTF_SET_FLAGS; return result; } + +GenTreeBfm* Compiler::gtNewBfxNode(var_types type, GenTree* base, unsigned offset, unsigned width) +{ + GenTreeBfm* result = new (this, GT_BFX) GenTreeBfm(GT_BFX, type, base, nullptr, offset, width); + result->gtFlags |= (base->gtFlags & GTF_ALL_EFFECT); + result->gtFlags &= ~GTF_SET_FLAGS; + return result; +} #endif GenTreeIntCon* Compiler::gtNewIconNode(ssize_t value, var_types type) @@ -10518,6 +10526,15 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) m_advance = &GenTreeUseEdgeIterator::Terminate; return; +#ifdef TARGET_ARM64 + case GT_BFX: + assert(m_node->AsOp()->gtOp2 == nullptr); + m_edge = &m_node->AsOp()->gtOp1; + assert(*m_edge != nullptr); + m_advance = &GenTreeUseEdgeIterator::Terminate; + return; +#endif + // Unary operators with an optional operand case GT_FIELD_ADDR: case GT_RETURN: diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 28f1e7809854aa..f3b3cd13a7f743 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5968,16 +5968,19 @@ struct GenTreeQmark : public GenTreeOp }; #ifdef TARGET_ARM64 -struct GenTreeBfi : public GenTreeOp +struct GenTreeBfm : public GenTreeOp { unsigned gtOffset; unsigned gtWidth; - GenTreeBfi(var_types type, GenTree* base, GenTree* src, unsigned offset, unsigned width) - : GenTreeOp(GT_BFI, type, base, src) + GenTreeBfm(genTreeOps oper, var_types type, GenTree* base, GenTree* src, unsigned offset, unsigned width) + : GenTreeOp(oper, type, base, src) , gtOffset(offset) , gtWidth(width) { + assert((oper == GT_BFI) || (oper == GT_BFX)); + assert((oper != GT_BFX) || (src == nullptr)); + assert((oper != GT_BFI) || (src != nullptr)); } unsigned GetOffset() const @@ -5994,7 +5997,7 @@ struct GenTreeBfi : public GenTreeOp } #if DEBUGGABLE_GENTREE - GenTreeBfi() + GenTreeBfm() : GenTreeOp() { } diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index d4f88ee01c77f0..ca603bb4e0963d 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -221,8 +221,9 @@ GTNODE(OR_NOT , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) GTNODE(XOR_NOT , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) #ifdef TARGET_ARM64 -GTNODE(BFI , GenTreeBfi ,0,0,GTK_BINOP|GTK_EXOP|DBK_NOTHIR) // Bitfield Insert. +GTNODE(BFI , GenTreeBfm ,0,0,GTK_BINOP|GTK_EXOP|DBK_NOTHIR) // Bitfield Insert. GTNODE(BFIZ , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) // Bitfield Insert in Zero. +GTNODE(BFX , GenTreeBfm ,0,0,GTK_UNOP|GTK_EXOP|DBK_NOTHIR) // Bitfield Extract. #endif //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index c8bee7ca7b537d..f04ae1a588a418 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -95,7 +95,7 @@ GTSTRUCT_1(StoreInd , GT_STOREIND) GTSTRUCT_1(CmpXchg , GT_CMPXCHG) #ifdef TARGET_ARM64 GTSTRUCT_N(Conditional , GT_SELECT, GT_SELECT_INC, GT_SELECT_INV, GT_SELECT_NEG) -GTSTRUCT_1(Bfi , GT_BFI) +GTSTRUCT_2(Bfm , GT_BFI, GT_BFX) #else GTSTRUCT_N(Conditional , GT_SELECT) #endif //TARGET_ARM64 diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 40ee24ab00229a..794cf2e787a0ec 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -12081,12 +12081,12 @@ bool Lowering::TryLowerOrToBFI(GenTreeOp* tree, GenTree** next) } else if (node->OperIs(GT_BFI)) { - GenTreeBfi* bfi = node->AsBfi(); - uint64_t m = bfi->GetMask() & regMask; + GenTreeBfm* bfm = node->AsBfm(); + uint64_t m = bfm->GetMask() & regMask; baseMask |= m; baseMaskKnown = true; - node = bfi->gtGetOp1(); + node = bfm->gtGetOp1(); continue; } else if (node->OperIs(GT_OR)) @@ -12118,19 +12118,19 @@ bool Lowering::TryLowerOrToBFI(GenTreeOp* tree, GenTree** next) } var_types ty = genActualType(tree->TypeGet()); - GenTreeBfi* bfi = + GenTreeBfm* bfm = comp->gtNewBfiNode(ty, bfiPattern.base, bfiPattern.value, static_cast(bfiPattern.offset), static_cast(bfiPattern.width)); - bfi->CopyCosts(tree); + bfm->CopyCosts(tree); - ContainCheckNode(bfi); + ContainCheckNode(bfm); - BlockRange().InsertBefore(tree, bfi); + BlockRange().InsertBefore(tree, bfm); LIR::Use use; if (BlockRange().TryGetUse(tree, &use)) { - use.ReplaceWith(bfi); + use.ReplaceWith(bfm); } // Remove old nodes depending on pattern kind @@ -12162,7 +12162,7 @@ bool Lowering::TryLowerOrToBFI(GenTreeOp* tree, GenTree** next) BlockRange().Remove(tree); - *next = bfi->gtNext; + *next = bfm->gtNext; return true; } @@ -12317,6 +12317,71 @@ bool Lowering::TryMatchOrToBfiPattern(GenTreeOp* tree, BfiPattern* result) result->width = width; return true; } + +//------------------------------------------------------------------------ +// TryLowerOrToBFX : Lower AND of left shift and constant +// +// Arguments: +// tree - pointer to the node +// next - [out] Next node to lower if this function returns true +// +// Return Value: +// false if no changes were made +// +bool Lowering::TryLowerOrToBFX(GenTreeOp* tree, GenTree** next) +{ + assert(tree->OperIs(GT_AND)); + + if (!comp->opts.OptimizationEnabled()) + { + return false; + } + + GenTree* shift = tree->gtGetOp1(); + GenTree* andConst = tree->gtGetOp2(); + if (!shift->OperIs(GT_RSH, GT_RSZ) || !andConst->IsIntegralConst()) + { + return false; + } + + GenTree* shiftVar = shift->gtGetOp1(); + GenTree* shiftConst = shift->gtGetOp2(); + if (!shiftConst->IsIntegralConst()) + { + return false; + } + + uint64_t mask = (uint64_t)andConst->AsIntConCommon()->IntegralValue(); + uint64_t shiftVal = (uint64_t)shiftConst->AsIntConCommon()->IntegralValue(); + if ((mask & (mask + 1)) != 0) + { + return false; + } + + uint64_t width = (uint64_t)BitOperations::PopCount(mask); + uint64_t offset = (uint64_t)shiftVal; + var_types ty = genActualType(tree->TypeGet()); + GenTreeBfm* bfm = comp->gtNewBfxNode(ty, shiftVar, static_cast(offset), static_cast(width)); + bfm->CopyCosts(tree); + + ContainCheckNode(bfm); + + BlockRange().InsertBefore(tree, bfm); + + LIR::Use use; + if (BlockRange().TryGetUse(tree, &use)) + { + use.ReplaceWith(bfm); + } + + BlockRange().Remove(shiftConst); + BlockRange().Remove(shift); + BlockRange().Remove(andConst); + BlockRange().Remove(tree); + + *next = bfm->gtNext; + return true; +} #endif //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 33eb39d8a3cc04..d4ad89b30c5808 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -123,6 +123,7 @@ class Lowering final : public Phase uint64_t width; }; bool TryMatchOrToBfiPattern(GenTreeOp* orTree, BfiPattern* result); + bool TryLowerOrToBFX(GenTreeOp* tree, GenTree** next); #endif #ifdef TARGET_RISCV64 bool TryLowerShiftAddToShxadd(GenTreeOp* tree, GenTree** next); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 8b66125ab81c25..1c1ce2bc02ebd6 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -659,14 +659,21 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) { return next; } - } - if (binOp->OperIs(GT_OR)) - { - GenTree* next; - if (TryLowerOrToBFI(binOp, &next)) + if (binOp->OperIs(GT_AND)) { - return next; + if (TryLowerOrToBFX(binOp, &next)) + { + return next; + } + } + else + { + assert(binOp->OperIs(GT_OR)); + if (TryLowerOrToBFI(binOp, &next)) + { + return next; + } } } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 64e000850c6356..e750af561636f8 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -887,6 +887,14 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree); break; + case GT_BFX: + { + srcCount = BuildOperandUses(tree->gtGetOp1()); + assert(dstCount == 1); + BuildDef(tree); + break; + } + case GT_RETURNTRAP: // this just turns into a compare of its child with an int // + a conditional call diff --git a/src/tests/JIT/opt/InstructionCombining/Bfx.cs b/src/tests/JIT/opt/InstructionCombining/Bfx.cs new file mode 100644 index 00000000000000..fe22a3753af2c6 --- /dev/null +++ b/src/tests/JIT/opt/InstructionCombining/Bfx.cs @@ -0,0 +1,267 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using static TestLibrary.Expect; +using Xunit; + +namespace TestBfx +{ + public class Program + { + [MethodImpl(MethodImplOptions.NoInlining)] + [Fact] + public static int CheckBfx() + { + bool fail = false; + + ExpectEqual(() => ExtractBits_Int_NoShift(0x7F654321), 0x1, ref fail); + ExpectEqual(() => ExtractBits_Int_Shift(0x7F654321), 0xC, ref fail); + ExpectEqual(() => ExtractBits_Int_Shift_Multiple(0x7F654321), 0x15C, ref fail); + ExpectEqual(() => ExtractBits_Int_Shift_Non_Continous_Mask(0x7F654321), 0x43, ref fail); + ExpectEqual(() => ExtractBits_Int_Shift_Mask0xFF(0x7F654321), 0xC, ref fail); + ExpectEqual(() => ExtractBits_Int_Shift_Mask0xFFFF(0x7F654321), 0x950C, ref fail); + ExpectEqual(() => ExtractBits_UInt_NoShift(0xFEDCBA98u), 0x98u, ref fail); + ExpectEqual(() => ExtractBits_UInt_Shift(0xFEDCBA98u), 0x3FBu, ref fail); + ExpectEqual(() => ExtractBits_UInt_Shift_Multiple(0xFEDCBA98u), 0x1A18u, ref fail); + ExpectEqual(() => ExtractBits_UInt_Shift_Non_Continous_Mask(0xFEDCBA98u), 0x4, ref fail); + ExpectEqual(() => ExtractBits_UInt_Shift_Mask0xFF(0xFEDCBA98u), 0xEAu, ref fail); + ExpectEqual(() => ExtractBits_UInt_Shift_Mask0xFFFF(0xFEDCBA98u), 0x72EAu, ref fail); + ExpectEqual(() => ExtractBits_Long_NoShift(0x7FFFEDCBA9876543L), 0x6543L, ref fail); + ExpectEqual(() => ExtractBits_Long_Shift(0x7FFFEDCBA9876543L), 0x1D95L, ref fail); + ExpectEqual(() => ExtractBits_Long_Shift_Multiple(0x7FFFEDCBA9876543L), 0x47F6EL, ref fail); + ExpectEqual(() => ExtractBits_Long_Shift_Non_Continous_Mask(0x7FFFEDCBA9876543L), 0x14L, ref fail); + ExpectEqual(() => ExtractBits_Long_Shift_Mask0xFF(0x7FFFEDCBA9876543L), 0x95L, ref fail); + ExpectEqual(() => ExtractBits_Long_Shift_Mask0xFFFF(0x7FFFEDCBA9876543L), 0x1D95L, ref fail); + ExpectEqual(() => ExtractBits_Long_Shift_Mask0xFFFFFFFF(0x7FFFEDCBA9876543L), 0x2EA61D95L, ref fail); + ExpectEqual(() => ExtractBits_Long_Shift_Mask0xFFFFFFFFFFFFFFFF(0x7FFFEDCBA9876543L), 0x1FFFFB72EA61D95L, ref fail); + ExpectEqual(() => ExtractBits_ULong_NoShift(0xFFFEEDCBA9876543UL), 0x3, ref fail); + ExpectEqual(() => ExtractBits_ULong_Shift(0xFFFEEDCBA9876543UL), 0x261D95UL, ref fail); + ExpectEqual(() => ExtractBits_ULong_Shift_Multiple(0xFFFEEDCBA9876543UL), 0x1107F6EUL, ref fail); + ExpectEqual(() => ExtractBits_ULong_Shift_Non_Continous_Mask(0xFFFEEDCBA9876543UL), 0x8UL, ref fail); + ExpectEqual(() => ExtractBits_ULong_Shift_Mask0xFF(0xFFFEEDCBA9876543UL), 0x95UL, ref fail); + ExpectEqual(() => ExtractBits_ULong_Shift_Mask0xFFFF(0xFFFEEDCBA9876543UL), 0x1D95UL, ref fail); + ExpectEqual(() => ExtractBits_ULong_Shift_Mask0xFFFFFFFF(0xFFFEEDCBA9876543UL), 0xCBA98765, ref fail); + + if (fail) + { + return 101; + } + return 100; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ExtractBits_Int_NoShift(int x) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #31 + return x & 0x1F; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ExtractBits_Int_Shift(int x) + { + //ARM64-FULL-LINE: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #6 + return (x >> 6) & 0x3F; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ExtractBits_Int_Shift_Multiple(int x) + { + //ARM64-FULL-LINE: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #7 + //ARM64-FULL-LINE: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #10, #9 + int a = (x >> 6) & 0x7F; + int b = (x >> 10) & 0x1FF; + return a + b; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ExtractBits_Int_Shift_Non_Continous_Mask(int x) + { + //ARM64-FULL-LINE: mov {{w[0-9]+}}, #243 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ASR #8 + return (x >> 8) & 0xF3; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ExtractBits_Int_Shift_Mask0xFF(int x) + { + //ARM64-FULL-LINE: asr {{w[0-9]+}}, {{w[0-9]+}}, #6 + //ARM64-FULL-LINE: uxtb {{w[0-9]+}}, {{w[0-9]+}} + return (x >> 6) & 0xFF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ExtractBits_Int_Shift_Mask0xFFFF(int x) + { + //ARM64-FULL-LINE: lsr {{w[0-9]+}}, {{w[0-9]+}}, #6 + //ARM64-FULL-LINE: uxth {{w[0-9]+}}, {{w[0-9]+}} + return (int)(((uint)x >> 6) & 0xFFFF); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ExtractBits_UInt_NoShift(uint x) + { + //ARM64-FULL-LINE and {{w[0-9]+}}, {{w[0-9]+}}, #511 + return x & 0x1FF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ExtractBits_UInt_Shift(uint x) + { + //ARM64-FULL-LINE: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #22, #10 + return (x >> 22) & 0x3FF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ExtractBits_UInt_Shift_Multiple(uint x) + { + //ARM64-FULL-LINE: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #12 + //ARM64-FULL-LINE: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #10, #13 + uint a = (x >> 6) & 0xFFF; + uint b = (x >> 10) & 0x1FFF; + return a + b; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ExtractBits_UInt_Shift_Non_Continous_Mask(uint x) + { + //ARM64-FULL-LINE: mov {{w[0-9]+}}, #5 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSR #24 + return (x >> 24) & 0x5; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ExtractBits_UInt_Shift_Mask0xFF(uint x) + { + //ARM64-FULL-LINE: lsr {{w[0-9]+}}, {{w[0-9]+}}, #6 + //ARM64-FULL-LINE: uxtb {{w[0-9]+}}, {{w[0-9]+}} + return (x >> 6) & 0xFF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ExtractBits_UInt_Shift_Mask0xFFFF(uint x) + { + //ARM64-FULL-LINE: lsr {{w[0-9]+}}, {{w[0-9]+}}, #6 + //ARM64-FULL-LINE: uxth {{w[0-9]+}}, {{w[0-9]+}} + return (x >> 6) & 0xFFFF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ExtractBits_Long_NoShift(long x) + { + //ARM64-FULL-LINE: uxth {{w[0-9]+}}, {{w[0-9]+}} + return x & 0xFFFF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ExtractBits_Long_Shift(long x) + { + //ARM64-FULL-LINE: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #6, #17 + return (x >> 6) & 0x1FFFF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ExtractBits_Long_Shift_Multiple(long x) + { + //ARM64-FULL-LINE: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #6, #18 + //ARM64-FULL-LINE: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #10, #19 + long a = (x >> 6) & 0x3FFFF; + long b = (x >> 10) & 0x7FFFF; + return a + b; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ExtractBits_Long_Shift_Non_Continous_Mask(long x) + { + //ARM64-FULL-LINE: mov {{x[0-9]+}}, #20 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ASR #12 + return (x >> 12) & 0x14; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ExtractBits_Long_Shift_Mask0xFF(long x) + { + //ARM64-FULL-LINE: asr {{x[0-9]+}}, {{x[0-9]+}}, #6 + //ARM64-FULL-LINE: uxtb {{w[0-9]+}}, {{w[0-9]+}} + return (x >> 6) & 0xFF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ExtractBits_Long_Shift_Mask0xFFFF(long x) + { + //ARM64-FULL-LINE: asr {{x[0-9]+}}, {{x[0-9]+}}, #6 + //ARM64-FULL-LINE: uxth {{w[0-9]+}}, {{w[0-9]+}} + return (x >> 6) & 0xFFFF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ExtractBits_Long_Shift_Mask0xFFFFFFFF(long x) + { + //ARM64-FULL-LINE: asr {{x[0-9]+}}, {{x[0-9]+}}, #6 + return (x >> 6) & 0xFFFFFFFFL; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ExtractBits_Long_Shift_Mask0xFFFFFFFFFFFFFFFF(long x) + { + //ARM64-FULL-LINE: lsr {{x[0-9]+}}, {{x[0-9]+}}, #6 + return (long)(((ulong)x >> 6) & 0xFFFFFFFFFFFFFFFFUL); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ExtractBits_ULong_NoShift(ulong x) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + return x & 0xF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ExtractBits_ULong_Shift(ulong x) + { + //ARM64-FULL-LINE: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #6, #22 + return (x >> 6) & 0x3FFFFF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ExtractBits_ULong_Shift_Multiple(ulong x) + { + //ARM64-FULL-LINE: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #6, #23 + //ARM64-FULL-LINE: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #10, #24 + ulong a = (x >> 6) & 0x7FFFFF; + ulong b = (x >> 10) & 0xFFFFFF; + return a + b; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ExtractBits_ULong_Shift_Non_Continous_Mask(ulong x) + { + //ARM64-FULL-LINE: mov {{x[0-9]+}}, #204 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSR #5 + return (x >> 5) & 0xCC; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ExtractBits_ULong_Shift_Mask0xFF(ulong x) + { + //ARM64-FULL-LINE: lsr {{x[0-9]+}}, {{x[0-9]+}}, #6 + //ARM64-FULL-LINE: uxtb {{w[0-9]+}}, {{w[0-9]+}} + return (x >> 6) & 0xFF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ExtractBits_ULong_Shift_Mask0xFFFF(ulong x) + { + //ARM64-FULL-LINE: lsr {{x[0-9]+}}, {{x[0-9]+}}, #6 + //ARM64-FULL-LINE: uxth {{w[0-9]+}}, {{w[0-9]+}} + return (x >> 6) & 0xFFFF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ExtractBits_ULong_Shift_Mask0xFFFFFFFF(ulong x) + { + //ARM64-FULL-LINE: lsr {{x[0-9]+}}, {{x[0-9]+}}, #8 + return (x >> 8) & 0xFFFFFFFFUL; + } + } +} diff --git a/src/tests/JIT/opt/InstructionCombining/Bfx.csproj b/src/tests/JIT/opt/InstructionCombining/Bfx.csproj new file mode 100644 index 00000000000000..48ceb6f3570323 --- /dev/null +++ b/src/tests/JIT/opt/InstructionCombining/Bfx.csproj @@ -0,0 +1,18 @@ + + + + true + + + None + True + + + + true + + + + + + From 1f98a7992a4340adbfd415b0c0e71b7c0dd9f85f Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Thu, 29 Jan 2026 10:39:59 +0000 Subject: [PATCH 03/13] Fix bug when creating a Bfx node if the width + offset > bitwidth - Remove Expect.cs --- src/coreclr/jit/lower.cpp | 14 +- .../CoreCLRTestLibrary.csproj | 1 - src/tests/Common/CoreCLRTestLibrary/Expect.cs | 26 - src/tests/JIT/opt/InstructionCombining/Bfi.cs | 1375 ++++++++++++++++- .../JIT/opt/InstructionCombining/Bfi.csproj | 1 - src/tests/JIT/opt/InstructionCombining/Bfx.cs | 267 +++- .../JIT/opt/InstructionCombining/Bfx.csproj | 1 - 7 files changed, 1555 insertions(+), 130 deletions(-) delete mode 100644 src/tests/Common/CoreCLRTestLibrary/Expect.cs diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 794cf2e787a0ec..45991bdc36fbd2 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -12358,10 +12358,16 @@ bool Lowering::TryLowerOrToBFX(GenTreeOp* tree, GenTree** next) return false; } - uint64_t width = (uint64_t)BitOperations::PopCount(mask); - uint64_t offset = (uint64_t)shiftVal; - var_types ty = genActualType(tree->TypeGet()); - GenTreeBfm* bfm = comp->gtNewBfxNode(ty, shiftVar, static_cast(offset), static_cast(width)); + uint64_t width = (uint64_t)BitOperations::PopCount(mask); + uint64_t offset = (uint64_t)shiftVal; + var_types ty = genActualType(tree->TypeGet()); + uint64_t bitWidth = genTypeSize(ty) * BITS_PER_BYTE; + if ((width > bitWidth) || (offset >= bitWidth) || ((offset + width) > bitWidth)) + { + return false; + } + + GenTreeBfm* bfm = comp->gtNewBfxNode(ty, shiftVar, static_cast(offset), static_cast(width)); bfm->CopyCosts(tree); ContainCheckNode(bfm); diff --git a/src/tests/Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj b/src/tests/Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj index 915e6b1ff1c632..6bbebee74d7177 100644 --- a/src/tests/Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj +++ b/src/tests/Common/CoreCLRTestLibrary/CoreCLRTestLibrary.csproj @@ -7,7 +7,6 @@ - diff --git a/src/tests/Common/CoreCLRTestLibrary/Expect.cs b/src/tests/Common/CoreCLRTestLibrary/Expect.cs deleted file mode 100644 index b9d62483d82fff..00000000000000 --- a/src/tests/Common/CoreCLRTestLibrary/Expect.cs +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System; -using System.Linq.Expressions; -using Xunit; - -namespace TestLibrary; - -public static class Expect -{ - public static void ExpectEqual(Expression> expr, T expected, ref bool fail) - { - var compiled = expr.Compile(); - T actual = compiled(); - - // Get just the expression body text - string exprText = expr.Body.ToString(); - - if (!Equals(actual, expected)) - { - Console.WriteLine($"{exprText} = {actual}, expected {expected}"); - fail = true; - } - } -} diff --git a/src/tests/JIT/opt/InstructionCombining/Bfi.cs b/src/tests/JIT/opt/InstructionCombining/Bfi.cs index 06369c8f5819ef..71ec9c9c640027 100644 --- a/src/tests/JIT/opt/InstructionCombining/Bfi.cs +++ b/src/tests/JIT/opt/InstructionCombining/Bfi.cs @@ -1,9 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System; using System.Runtime.CompilerServices; -using static TestLibrary.Expect; using Xunit; namespace TestBfi @@ -16,29 +14,478 @@ public static int CheckBfi() { bool fail = false; - ExpectEqual(() => ComposeBits_BaseAnd_Mask0(0xB, 0x1), 0xB, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_Mask1(0xB, 0x1), 0x1B, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_Mask2(0xB, 0x2), 0x2B, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_Mask3(0xB, 0x2), 0x2B, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_MaskFF(0xB, 0xFA), 0xFAB, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_Shift0(0xE, 0x1), 0x7, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_Shift31(0xE, 0x1), 0x8000000E, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_Shift31_Mask3(0xA, 0x3), 0x8000000A, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_Shift32(0xE, 0x1), 0xF, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_Overlap(0x1, 0x3), 0xD, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_Variable(0x1000, 0xC), 0xD000, ref fail); - ExpectEqual(() => ComposeBits_BaseAnd_Variables(0xF, 0xA, 0x2), 0x2A, ref fail); - ExpectEqual(() => ComposeBits_BaseConstant_Mask3(0x3), 0x3A, ref fail); - ExpectEqual(() => ComposeBits_BaseConstant_Mask4(0x7), 0x4A, ref fail); - ExpectEqual(() => ComposeBits_BaseConstant_Overlap(0x2), 0x3F, ref fail); - ExpectEqual(() => ComposeBits_BaseBfi(0xB, 0x2, 0x4C), 0x132B, ref fail); - ExpectEqual(() => ComposeBits_BaseBfi_SwapOrder(0xB, 0x2, 0x4C), 0x132B, ref fail); - ExpectEqual(() => ComposeBits_BaseBfi_BfiOverlap(0xB, 0x2, 0x4C), 0x9AB, ref fail); - ExpectEqual(() => ComposeBits_BaseBfi_OverlapBfi(0xB, 0x2, 0x4C), 0x99B, ref fail); - ExpectEqual(() => ComposeBits_BaseBfi_BfiOverlapBfi(0xB, 0x2, 0x4C, 0x5), 0x59AB, ref fail); - ExpectEqual(() => ComposeBits_BaseBfi_NoBfiAfterInvalidPattern(0xB, 0x2, 0xFC, 0xFF, 0x7), 0xFFAB, ref fail); - ExpectEqual(() => ComposeBits_Pack32Values(1,0,0,0,0,0,0,0,1,1,1,1,0,1,1,1, - 1,0,1,1,0,0,1,1,1,1,0,1,0,1,0,1), 0xABCDEF01, ref fail); + // Int + + if (ComposeBits_BaseAnd_Mask0_Int(0xB, 0x1) != 0xB) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask1_Int(0xB, 0x1) != 0x1B) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask2_Int(0xB, 0x2) != 0x2B) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask3_Int(0xB, 0x2) != 0x2B) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift0_Int(0xE, 0x1) != 0x7) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift31_Int(0xE, 0x1) != unchecked((int)0x8000000E)) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift31_Mask3_Int(0xA, 0x3) != unchecked((int)0x8000000A)) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift32_Int(0xE, 0x1) != 0xF) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift28_MaskF_MaxWidth_Int(0xE, 0xF) != unchecked((int)0xF000000E)) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift28_Mask1F_TooWide_Int(0xE, 0x1F) != unchecked((int)0xF000000E)) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Overlap_Int(0x1, 0x3) != 0xD) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Variable_Int(0x1000, 0xC) != 0xD000) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Variables_Int(0xF, 0xA, 0x2) != 0x2A) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Mask3_Int(0x3) != 0x3A) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Mask4_Int(0x7) != 0x4A) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Overlap_Int(0x2) != 0x3F) + { + fail = true; + } + + if (ComposeBits_BaseBfi_Int(0xB, 0x2, 0x4C) != 0x132B) + { + fail = true; + } + + if (ComposeBits_BaseBfi_SwapOrder_Int(0xB, 0x2, 0x4C) != 0x132B) + { + fail = true; + } + + if (ComposeBits_BaseBfi_BfiOverlap_Int(0xB, 0x2, 0x4C) != 0x9AB) + { + fail = true; + } + + if (ComposeBits_BaseBfi_OverlapBfi_Int(0xB, 0x2, 0x4C) != 0x99B) + { + fail = true; + } + + if (ComposeBits_BaseBfi_BfiOverlapBfi_Int(0xB, 0x2, 0x4C, 0x5) != 0x59AB) + { + fail = true; + } + + if (ComposeBits_Pack32Values_Int(1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, + 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1) != -0x543210ff) + { + fail = true; + } + + + // UInt + + if (ComposeBits_BaseAnd_Mask0_UInt(0xBu, 0x1u) != 0xBu) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask1_UInt(0xBu, 0x1u) != 0x1Bu) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask2_UInt(0xBu, 0x2u) != 0x2Bu) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask3_UInt(0xBu, 0x2u) != 0x2Bu) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift0_UInt(0xEu, 0x1u) != 0x7u) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift31_UInt(0xEu, 0x1u) != 0x8000000Eu) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift31_Mask3_UInt(0xAu, 0x3u) != 0x8000000Au) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift32_UInt(0xEu, 0x1u) != 0xFu) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift28_MaskF_MaxWidth_UInt(0xEu, 0xFu) != 0xF000000Eu) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift28_Mask1F_TooWide_UInt(0xEu, 0x1Fu) != 0xF000000Eu) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Overlap_UInt(0x1u, 0x3u) != 0xDu) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Variable_UInt(0x1000u, 0xCu) != 0xD000u) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Variables_UInt(0xFu, 0xAu, 0x2u) != 0x2Au) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Mask3_UInt(0x3u) != 0x3Au) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Mask4_UInt(0x7u) != 0x4Au) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Overlap_UInt(0x2u) != 0x3Fu) + { + fail = true; + } + + if (ComposeBits_BaseBfi_UInt(0xBu, 0x2u, 0x4Cu) != 0x132Bu) + { + fail = true; + } + + if (ComposeBits_BaseBfi_SwapOrder_UInt(0xBu, 0x2u, 0x4Cu) != 0x132Bu) + { + fail = true; + } + + if (ComposeBits_BaseBfi_BfiOverlap_UInt(0xBu, 0x2u, 0x4Cu) != 0x9ABu) + { + fail = true; + } + + if (ComposeBits_BaseBfi_OverlapBfi_UInt(0xBu, 0x2u, 0x4Cu) != 0x99Bu) + { + fail = true; + } + + if (ComposeBits_BaseBfi_BfiOverlapBfi_UInt(0xBu, 0x2u, 0x4Cu, 0x5u) != 0x59ABu) + { + fail = true; + } + + if (ComposeBits_Pack32Values_UInt(1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, + 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1) != 0xABCDEF01u) + { + fail = true; + } + + // Long + + if (ComposeBits_BaseAnd_Mask0_Long(0xBL, 0x1L) != 0xBL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask1_Long(0xBL, 0x1L) != 0x1BL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask2_Long(0xBL, 0x2L) != 0x2BL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask3_Long(0xBL, 0x2L) != 0x2BL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift0_Long(0xEL, 0x1L) != 0x7L) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift31_Long(0xEL, 0x1L) != 0x8000000EL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift31_Mask3_Long(0xAL, 0x3L) != 0x18000000AL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift32_Long(0xEL, 0x1L) != 0x10000000EL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift28_MaskF_MaxWidth_Long(0xEL, 0xFL) != 0xF000000EL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift28_Mask1F_TooWide_Long(0xEL, 0x1FL) != 0x1F000000EL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift60_MaskF_MaxWidth_Long(0xEL, 0xFL) != unchecked((long)0xF00000000000000EUL)) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift60_Mask1F_TooWide_Long(0xEL, 0x1FL) != unchecked((long)0xF00000000000000EUL)) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Overlap_Long(0x1L, 0x3L) != 0xDL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Variable_Long(0x1000L, 0xCL) != 0xD000L) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Variables_Long(0xFL, 0xAL, 0x2L) != 0x2AL) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Mask3_Long(0x3L) != 0x3AL) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Mask4_Long(0x7L) != 0x4AL) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Overlap_Long(0x2L) != 0x3FL) + { + fail = true; + } + + if (ComposeBits_BaseBfi_Long(0xBL, 0x2L, 0x4CL) != 0x132BL) + { + fail = true; + } + + if (ComposeBits_BaseBfi_SwapOrder_Long(0xBL, 0x2L, 0x4CL) != 0x132BL) + { + fail = true; + } + + if (ComposeBits_BaseBfi_BfiOverlap_Long(0xBL, 0x2L, 0x4CL) != 0x9ABL) + { + fail = true; + } + + if (ComposeBits_BaseBfi_OverlapBfi_Long(0xBL, 0x2L, 0x4CL) != 0x99BL) + { + fail = true; + } + + if (ComposeBits_BaseBfi_BfiOverlapBfi_Long(0xBL, 0x2L, 0x4CL, 0x5L) != 0x59ABL) + { + fail = true; + } + + if (ComposeBits_Pack32Values_Long(1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, + 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1) != 0xABCDEF01L) + { + fail = true; + } + + // ULong + + if (ComposeBits_BaseAnd_Mask0_ULong(0xBUL, 0x1UL) != 0xBUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask1_ULong(0xBUL, 0x1UL) != 0x1BUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask2_ULong(0xBUL, 0x2UL) != 0x2BUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Mask3_ULong(0xBUL, 0x2UL) != 0x2BUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift0_ULong(0xEUL, 0x1UL) != 0x7UL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift31_ULong(0xEUL, 0x1UL) != 0x8000000EUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift31_Mask3_ULong(0xAUL, 0x3UL) != 0x18000000AUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift32_ULong(0xEUL, 0x1UL) != 0x10000000EUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift28_MaskF_MaxWidth_ULong(0xEUL, 0xFUL) != 0xF000000EUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift28_Mask1F_TooWide_ULong(0xEUL, 0x1FUL) != 0x1F000000EUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift60_MaskF_MaxWidth_ULong(0xEUL, 0xFUL) != 0xF00000000000000EUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Shift60_Mask1F_TooWide_ULong(0xEUL, 0x1FUL) != 0xF00000000000000EUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Overlap_ULong(0x1UL, 0x3UL) != 0xDUL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Variable_ULong(0x1000UL, 0xCUL) != 0xD000UL) + { + fail = true; + } + + if (ComposeBits_BaseAnd_Variables_ULong(0xFUL, 0xAUL, 0x2UL) != 0x2AUL) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Mask3_ULong(0x3UL) != 0x3AUL) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Mask4_ULong(0x7UL) != 0x4AUL) + { + fail = true; + } + + if (ComposeBits_BaseConstant_Overlap_ULong(0x2UL) != 0x3FUL) + { + fail = true; + } + + if (ComposeBits_BaseBfi_ULong(0xBUL, 0x2UL, 0x4CUL) != 0x132BUL) + { + fail = true; + } + + if (ComposeBits_BaseBfi_SwapOrder_ULong(0xBUL, 0x2UL, 0x4CUL) != 0x132BUL) + { + fail = true; + } + + if (ComposeBits_BaseBfi_BfiOverlap_ULong(0xBUL, 0x2UL, 0x4CUL) != 0x9ABUL) + { + fail = true; + } + + if (ComposeBits_BaseBfi_OverlapBfi_ULong(0xBUL, 0x2UL, 0x4CUL) != 0x99BUL) + { + fail = true; + } + + if (ComposeBits_BaseBfi_BfiOverlapBfi_ULong(0xBUL, 0x2UL, 0x4CUL, 0x5UL) != 0x59ABUL) + { + fail = true; + } + + if (ComposeBits_Pack32Values_ULong(1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, + 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1) != 0xABCDEF01UL) + { + fail = true; + } if (fail) { @@ -48,14 +495,271 @@ public static int CheckBfi() } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseAnd_Mask0(int a, int b) + static int ComposeBits_BaseAnd_Mask0_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + return (a & 0xF) | ((b & 0x0) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Mask1_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #1 + return (a & 0xf) | ((b & 0x1) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Mask2_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #2 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4 + return (a & 0xf) | ((b & 0x2) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Mask3_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + return (a & 0xf) | ((b & 0x3) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Shift0_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #7 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #1 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + return (a & 0x7) | (b & 0x1); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Shift31_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #31, #1 + return (a & 0xf) | ((b & 0x1) << 31); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Shift31_Mask3_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #31 + return (a & 0xf) | ((b & 0x3) << 31); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Shift32_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #1 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + return (a & 0xf) | ((b & 0x1) << 32); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Shift28_MaskF_MaxWidth_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #28, #4 + return (a & 0xf) | ((b & 0xF) << 28); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Shift28_Mask1F_TooWide_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #31 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #28 + return (a & 0xf) | ((b & 0x1F) << 28); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Overlap_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #7 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #2 + return (a & 0x7) | ((b & 0x3) << 2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Variable_Int(int a, int b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #12 + return a | ((b & 0xF) << 12); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseAnd_Variables_Int(int a, int b, int c) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4 + return (a & b) | ((c & 0x3) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseConstant_Mask3_Int(int a) + { + //ARM64-FULL-LINE: mov {{w[0-9]+}}, #10 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + return 0xA | ((a & 0x3) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseConstant_Mask4_Int(int a) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #4 + //ARM64-FULL-LINE: mov {{w[0-9]+}}, #10 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4 + return 0xA | ((a & 0x4) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseConstant_Overlap_Int(int a) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 + //ARM64-FULL-LINE: lsl {{w[0-9]+}}, {{w[0-9]+}}, #4 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, #31 + return 0x1F | ((a & 0x3) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseBfi_Int(int a, int b, int c) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #6, #7 + return (a & 0xf) | ((b & 0x3) << 4) | ((c & 0x7F) << 6); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseBfi_SwapOrder_Int(int a, int b, int c) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #6, #7 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + return (a & 0xf) | ((c & 0x7F) << 6) | ((b & 0x3) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseBfi_BfiOverlap_Int(int a, int b, int c) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #127 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #5 + return (a & 0xf) | ((b & 0x3) << 4) | ((c & 0x7F) << 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseBfi_OverlapBfi_Int(int a, int b, int c) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #3 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #5, #7 + return (a & 0xf) | ((b & 0x3) << 3) | ((c & 0x7F) << 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_BaseBfi_BfiOverlapBfi_Int(int a, int b, int c, int d) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #127 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #5 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #12, #3 + return (a & 0xf) | ((b & 0x3) << 4) | ((c & 0x7F) << 5) | ((d & 0x7) << 12); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ComposeBits_Pack32Values_Int( + int b0, int b1, int b2, int b3, + int b4, int b5, int b6, int b7, + int b8, int b9, int b10, int b11, + int b12, int b13, int b14, int b15, + int b16, int b17, int b18, int b19, + int b20, int b21, int b22, int b23, + int b24, int b25, int b26, int b27, + int b28, int b29, int b30, int b31) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #1, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #2, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #5, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #6, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #7, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #8, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #9, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #10, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #11, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #12, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #13, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #14, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #15, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #16, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #17, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #18, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #19, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #20, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #21, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #22, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #23, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #24, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #25, #1 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #1 + return ((b0 & 1) << 0) | + ((b1 & 1) << 1) | + ((b2 & 1) << 2) | + ((b3 & 1) << 3) | + ((b4 & 1) << 4) | + ((b5 & 1) << 5) | + ((b6 & 1) << 6) | + ((b7 & 1) << 7) | + ((b8 & 1) << 8) | + ((b9 & 1) << 9) | + ((b10 & 1) << 10) | + ((b11 & 1) << 11) | + ((b12 & 1) << 12) | + ((b13 & 1) << 13) | + ((b14 & 1) << 14) | + ((b15 & 1) << 15) | + ((b16 & 1) << 16) | + ((b17 & 1) << 17) | + ((b18 & 1) << 18) | + ((b19 & 1) << 19) | + ((b20 & 1) << 20) | + ((b21 & 1) << 21) | + ((b22 & 1) << 22) | + ((b23 & 1) << 23) | + ((b24 & 1) << 24) | + ((b25 & 1) << 25) | + ((b26 & 1) << 26) | + ((b27 & 1) << 27) | + ((b28 & 1) << 28) | + ((b29 & 1) << 29) | + ((b30 & 1) << 30) | + ((b31 & 1) << 31); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseAnd_Mask0_UInt(uint a, uint b) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 return (a & 0xF) | ((b & 0x0) << 4); } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseAnd_Mask1(int a, int b) + static uint ComposeBits_BaseAnd_Mask1_UInt(uint a, uint b) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #1 @@ -63,7 +767,7 @@ static int ComposeBits_BaseAnd_Mask1(int a, int b) } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseAnd_Mask2(int a, int b) + static uint ComposeBits_BaseAnd_Mask2_UInt(uint a, uint b) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #2 @@ -72,7 +776,7 @@ static int ComposeBits_BaseAnd_Mask2(int a, int b) } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseAnd_Mask3(int a, int b) + static uint ComposeBits_BaseAnd_Mask3_UInt(uint a, uint b) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 @@ -80,15 +784,7 @@ static int ComposeBits_BaseAnd_Mask3(int a, int b) } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseAnd_MaskFF(int a, int b) - { - //AR M64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 - //AR M64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 - return (a & 0xf) | ((b & 0xFF) << 4); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_BaseAnd_Shift0(uint a, uint b) + static uint ComposeBits_BaseAnd_Shift0_UInt(uint a, uint b) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #7 //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #1 @@ -97,7 +793,7 @@ static uint ComposeBits_BaseAnd_Shift0(uint a, uint b) } [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_BaseAnd_Shift31(uint a, uint b) + static uint ComposeBits_BaseAnd_Shift31_UInt(uint a, uint b) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #31, #1 @@ -105,7 +801,7 @@ static uint ComposeBits_BaseAnd_Shift31(uint a, uint b) } [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_BaseAnd_Shift31_Mask3(uint a, uint b) + static uint ComposeBits_BaseAnd_Shift31_Mask3_UInt(uint a, uint b) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 @@ -114,7 +810,7 @@ static uint ComposeBits_BaseAnd_Shift31_Mask3(uint a, uint b) } [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_BaseAnd_Shift32(uint a, uint b) + static uint ComposeBits_BaseAnd_Shift32_UInt(uint a, uint b) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #1 @@ -123,7 +819,24 @@ static uint ComposeBits_BaseAnd_Shift32(uint a, uint b) } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseAnd_Overlap(int a, int b) + static uint ComposeBits_BaseAnd_Shift28_MaskF_MaxWidth_UInt(uint a, uint b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #28, #4 + return (a & 0xf) | ((b & 0xF) << 28); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseAnd_Shift28_Mask1F_TooWide_UInt(uint a, uint b) + { + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #31 + //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #28 + return (a & 0xf) | ((b & 0x1F) << 28); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ComposeBits_BaseAnd_Overlap_UInt(uint a, uint b) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #7 //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 @@ -132,7 +845,7 @@ static int ComposeBits_BaseAnd_Overlap(int a, int b) } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseAnd_Variable(int a, int b) + static uint ComposeBits_BaseAnd_Variable_UInt(uint a, uint b) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #12 @@ -140,7 +853,7 @@ static int ComposeBits_BaseAnd_Variable(int a, int b) } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseAnd_Variables(int a, int b, int c) + static uint ComposeBits_BaseAnd_Variables_UInt(uint a, uint b, uint c) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 @@ -149,33 +862,33 @@ static int ComposeBits_BaseAnd_Variables(int a, int b, int c) } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseConstant_Mask3(int a) + static uint ComposeBits_BaseConstant_Mask3_UInt(uint a) { //ARM64-FULL-LINE: mov {{w[0-9]+}}, #10 //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 - return 0xA | ((a & 0x3) << 4); + return 0xAu | ((a & 0x3) << 4); } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseConstant_Mask4(int a) + static uint ComposeBits_BaseConstant_Mask4_UInt(uint a) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #4 //ARM64-FULL-LINE: mov {{w[0-9]+}}, #10 //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #4 - return 0xA | ((a & 0x4) << 4); + return 0xAu | ((a & 0x4) << 4); } [MethodImpl(MethodImplOptions.NoInlining)] - static int ComposeBits_BaseConstant_Overlap(int a) + static uint ComposeBits_BaseConstant_Overlap_UInt(uint a) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 //ARM64-FULL-LINE: lsl {{w[0-9]+}}, {{w[0-9]+}}, #4 //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, #31 - return 0x1F | ((a & 0x3) << 4); + return 0x1Fu | ((a & 0x3) << 4); } [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_BaseBfi(uint a, uint b, uint c) + static uint ComposeBits_BaseBfi_UInt(uint a, uint b, uint c) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 @@ -184,7 +897,7 @@ static uint ComposeBits_BaseBfi(uint a, uint b, uint c) } [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_BaseBfi_SwapOrder(uint a, uint b, uint c) + static uint ComposeBits_BaseBfi_SwapOrder_UInt(uint a, uint b, uint c) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #6, #7 @@ -193,7 +906,7 @@ static uint ComposeBits_BaseBfi_SwapOrder(uint a, uint b, uint c) } [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_BaseBfi_BfiOverlap(uint a, uint b, uint c) + static uint ComposeBits_BaseBfi_BfiOverlap_UInt(uint a, uint b, uint c) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 @@ -203,7 +916,7 @@ static uint ComposeBits_BaseBfi_BfiOverlap(uint a, uint b, uint c) } [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_BaseBfi_OverlapBfi(uint a, uint b, uint c) + static uint ComposeBits_BaseBfi_OverlapBfi_UInt(uint a, uint b, uint c) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #3 @@ -213,7 +926,7 @@ static uint ComposeBits_BaseBfi_OverlapBfi(uint a, uint b, uint c) } [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_BaseBfi_BfiOverlapBfi(uint a, uint b, uint c, uint d) + static uint ComposeBits_BaseBfi_BfiOverlapBfi_UInt(uint a, uint b, uint c, uint d) { //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 @@ -224,19 +937,7 @@ static uint ComposeBits_BaseBfi_BfiOverlapBfi(uint a, uint b, uint c, uint d) } [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_BaseBfi_NoBfiAfterInvalidPattern(uint a, uint b, uint c, uint d, uint e) - { - //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #15 - //ARM64-FULL-LINE: bfi {{w[0-9]+}}, {{w[0-9]+}}, #4, #2 - //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #5 - //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #7 - //ARM64-FULL-LINE: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, LSL #13 - return (a & 0xf) | ((b & 0x3) << 4) | ((c & d) << 5) | ((e & 0x7) << 13); - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static uint ComposeBits_Pack32Values( + static uint ComposeBits_Pack32Values_UInt( uint b0, uint b1, uint b2, uint b3, uint b4, uint b5, uint b6, uint b7, uint b8, uint b9, uint b10, uint b11, @@ -306,5 +1007,543 @@ static uint ComposeBits_Pack32Values( ((b30 & 1u) << 30) | ((b31 & 1u) << 31); } + + + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Mask0_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + return (a & 0xFL) | ((b & 0x0L) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Mask1_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #1 + return (a & 0xfL) | ((b & 0x1L) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Mask2_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #2 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #4 + return (a & 0xfL) | ((b & 0x2L) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Mask3_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + return (a & 0xfL) | ((b & 0x3L) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Shift0_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #7 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #1 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + return (a & 0x7L) | (b & 0x1L); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Shift31_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #31, #1 + return (a & 0xfL) | ((b & 0x1L) << 31); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Shift31_Mask3_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #31, #2 + return (a & 0xfL) | ((b & 0x3L) << 31); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Shift32_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #32, #1 + return (a & 0xfL) | ((b & 0x1L) << 32); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Shift28_MaskF_MaxWidth_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #28, #4 + return (a & 0xfL) | ((b & 0xFL) << 28); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Shift28_Mask1F_TooWide_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #28, #5 + return (a & 0xfL) | ((b & 0x1FL) << 28); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Shift60_MaskF_MaxWidth_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #60, #4 + return (a & 0xfL) | ((b & 0xFL) << 60); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Shift60_Mask1F_TooWide_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #31 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #60 + return (a & 0xfL) | ((b & 0x1FL) << 60); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Overlap_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #7 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #2 + return (a & 0x7L) | ((b & 0x3L) << 2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Variable_Long(long a, long b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #12 + return a | ((b & 0xFL) << 12); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseAnd_Variables_Long(long a, long b, long c) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #4 + return (a & b) | ((c & 0x3L) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseConstant_Mask3_Long(long a) + { + //ARM64-FULL-LINE: mov {{x[0-9]+}}, #10 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + return 0xAL | ((a & 0x3L) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseConstant_Mask4_Long(long a) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #4 + //ARM64-FULL-LINE: mov {{x[0-9]+}}, #10 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #4 + return 0xAL | ((a & 0x4L) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseConstant_Overlap_Long(long a) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #3 + //ARM64-FULL-LINE: lsl {{x[0-9]+}}, {{x[0-9]+}}, #4 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, #31 + return 0x1FL | ((a & 0x3L) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseBfi_Long(long a, long b, long c) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #6, #7 + return (a & 0xfL) | ((b & 0x3L) << 4) | ((c & 0x7FL) << 6); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseBfi_SwapOrder_Long(long a, long b, long c) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #6, #7 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + return (a & 0xfL) | ((c & 0x7FL) << 6) | ((b & 0x3L) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseBfi_BfiOverlap_Long(long a, long b, long c) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #127 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #5 + return (a & 0xfL) | ((b & 0x3L) << 4) | ((c & 0x7FL) << 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseBfi_OverlapBfi_Long(long a, long b, long c) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #3 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #5, #7 + return (a & 0xfL) | ((b & 0x3L) << 3) | ((c & 0x7FL) << 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_BaseBfi_BfiOverlapBfi_Long(long a, long b, long c, long d) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #127 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #5 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #12, #3 + return (a & 0xfL) | ((b & 0x3L) << 4) | ((c & 0x7FL) << 5) | ((d & 0x7L) << 12); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ComposeBits_Pack32Values_Long( + long b0, long b1, long b2, long b3, + long b4, long b5, long b6, long b7, + long b8, long b9, long b10, long b11, + long b12, long b13, long b14, long b15, + long b16, long b17, long b18, long b19, + long b20, long b21, long b22, long b23, + long b24, long b25, long b26, long b27, + long b28, long b29, long b30, long b31) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #1, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #2, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #3, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #5, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #6, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #7, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #8, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #9, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #10, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #11, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #12, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #13, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #14, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #15, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #16, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #17, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #18, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #19, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #20, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #21, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #22, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #23, #1 + return ((b0 & 1L) << 0) | + ((b1 & 1L) << 1) | + ((b2 & 1L) << 2) | + ((b3 & 1L) << 3) | + ((b4 & 1L) << 4) | + ((b5 & 1L) << 5) | + ((b6 & 1L) << 6) | + ((b7 & 1L) << 7) | + ((b8 & 1L) << 8) | + ((b9 & 1L) << 9) | + ((b10 & 1L) << 10) | + ((b11 & 1L) << 11) | + ((b12 & 1L) << 12) | + ((b13 & 1L) << 13) | + ((b14 & 1L) << 14) | + ((b15 & 1L) << 15) | + ((b16 & 1L) << 16) | + ((b17 & 1L) << 17) | + ((b18 & 1L) << 18) | + ((b19 & 1L) << 19) | + ((b20 & 1L) << 20) | + ((b21 & 1L) << 21) | + ((b22 & 1L) << 22) | + ((b23 & 1L) << 23) | + ((b24 & 1L) << 24) | + ((b25 & 1L) << 25) | + ((b26 & 1L) << 26) | + ((b27 & 1L) << 27) | + ((b28 & 1L) << 28) | + ((b29 & 1L) << 29) | + ((b30 & 1L) << 30) | + ((b31 & 1L) << 31); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Mask0_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + return (a & 0xFUL) | ((b & 0x0UL) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Mask1_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #1 + return (a & 0xfUL) | ((b & 0x1UL) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Mask2_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #2 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #4 + return (a & 0xfUL) | ((b & 0x2UL) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Mask3_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + return (a & 0xfUL) | ((b & 0x3UL) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Shift0_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #7 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #1 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + return (a & 0x7UL) | (b & 0x1UL); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Shift31_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #31, #1 + return (a & 0xfUL) | ((b & 0x1UL) << 31); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Shift31_Mask3_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #31, #2 + return (a & 0xfUL) | ((b & 0x3UL) << 31); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Shift32_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #32, #1 + return (a & 0xfUL) | ((b & 0x1UL) << 32); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Shift28_MaskF_MaxWidth_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #28, #4 + return (a & 0xfUL) | ((b & 0xFUL) << 28); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Shift28_Mask1F_TooWide_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #28, #5 + return (a & 0xfUL) | ((b & 0x1FUL) << 28); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Shift60_MaskF_MaxWidth_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #60, #4 + return (a & 0xfUL) | ((b & 0xFUL) << 60); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Shift60_Mask1F_TooWide_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #31 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #60 + return (a & 0xfUL) | ((b & 0x1FUL) << 60); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Overlap_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #7 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #2 + return (a & 0x7UL) | ((b & 0x3UL) << 2); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Variable_ULong(ulong a, ulong b) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #12 + return a | ((b & 0xFUL) << 12); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseAnd_Variables_ULong(ulong a, ulong b, ulong c) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #4 + return (a & b) | ((c & 0x3UL) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseConstant_Mask3_ULong(ulong a) + { + //ARM64-FULL-LINE: mov {{x[0-9]+}}, #10 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + return 0xAUL | ((a & 0x3UL) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseConstant_Mask4_ULong(ulong a) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #4 + //ARM64-FULL-LINE: mov {{x[0-9]+}}, #10 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #4 + return 0xAUL | ((a & 0x4UL) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseConstant_Overlap_ULong(ulong a) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #3 + //ARM64-FULL-LINE: lsl {{x[0-9]+}}, {{x[0-9]+}}, #4 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, #31 + return 0x1FUL | ((a & 0x3UL) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseBfi_ULong(ulong a, ulong b, ulong c) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #6, #7 + return (a & 0xfUL) | ((b & 0x3UL) << 4) | ((c & 0x7FUL) << 6); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseBfi_SwapOrder_ULong(ulong a, ulong b, ulong c) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #6, #7 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + return (a & 0xfUL) | ((c & 0x7FUL) << 6) | ((b & 0x3UL) << 4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseBfi_BfiOverlap_ULong(ulong a, ulong b, ulong c) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #127 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #5 + return (a & 0xfUL) | ((b & 0x3UL) << 4) | ((c & 0x7FUL) << 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseBfi_OverlapBfi_ULong(ulong a, ulong b, ulong c) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #3 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #3 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #5, #7 + return (a & 0xfUL) | ((b & 0x3UL) << 3) | ((c & 0x7FUL) << 5); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_BaseBfi_BfiOverlapBfi_ULong(ulong a, ulong b, ulong c, ulong d) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #15 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #2 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #127 + //ARM64-FULL-LINE: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, LSL #5 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #12, #3 + return (a & 0xfUL) | ((b & 0x3UL) << 4) | ((c & 0x7FUL) << 5) | ((d & 0x7UL) << 12); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ComposeBits_Pack32Values_ULong( + ulong b0, ulong b1, ulong b2, ulong b3, + ulong b4, ulong b5, ulong b6, ulong b7, + ulong b8, ulong b9, ulong b10, ulong b11, + ulong b12, ulong b13, ulong b14, ulong b15, + ulong b16, ulong b17, ulong b18, ulong b19, + ulong b20, ulong b21, ulong b22, ulong b23, + ulong b24, ulong b25, ulong b26, ulong b27, + ulong b28, ulong b29, ulong b30, ulong b31) + { + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #1, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #2, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #3, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #4, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #5, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #6, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #7, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #8, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #9, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #10, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #11, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #12, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #13, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #14, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #15, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #16, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #17, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #18, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #19, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #20, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #21, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #22, #1 + //ARM64-FULL-LINE: bfi {{x[0-9]+}}, {{x[0-9]+}}, #23, #1 + return ((b0 & 1UL) << 0) | + ((b1 & 1UL) << 1) | + ((b2 & 1UL) << 2) | + ((b3 & 1UL) << 3) | + ((b4 & 1UL) << 4) | + ((b5 & 1UL) << 5) | + ((b6 & 1UL) << 6) | + ((b7 & 1UL) << 7) | + ((b8 & 1UL) << 8) | + ((b9 & 1UL) << 9) | + ((b10 & 1UL) << 10) | + ((b11 & 1UL) << 11) | + ((b12 & 1UL) << 12) | + ((b13 & 1UL) << 13) | + ((b14 & 1UL) << 14) | + ((b15 & 1UL) << 15) | + ((b16 & 1UL) << 16) | + ((b17 & 1UL) << 17) | + ((b18 & 1UL) << 18) | + ((b19 & 1UL) << 19) | + ((b20 & 1UL) << 20) | + ((b21 & 1UL) << 21) | + ((b22 & 1UL) << 22) | + ((b23 & 1UL) << 23) | + ((b24 & 1UL) << 24) | + ((b25 & 1UL) << 25) | + ((b26 & 1UL) << 26) | + ((b27 & 1UL) << 27) | + ((b28 & 1UL) << 28) | + ((b29 & 1UL) << 29) | + ((b30 & 1UL) << 30) | + ((b31 & 1UL) << 31); + } } } diff --git a/src/tests/JIT/opt/InstructionCombining/Bfi.csproj b/src/tests/JIT/opt/InstructionCombining/Bfi.csproj index 21cac1ee0cb80d..e78e900a98e1d7 100644 --- a/src/tests/JIT/opt/InstructionCombining/Bfi.csproj +++ b/src/tests/JIT/opt/InstructionCombining/Bfi.csproj @@ -13,6 +13,5 @@ - diff --git a/src/tests/JIT/opt/InstructionCombining/Bfx.cs b/src/tests/JIT/opt/InstructionCombining/Bfx.cs index fe22a3753af2c6..ef672c0e2f1995 100644 --- a/src/tests/JIT/opt/InstructionCombining/Bfx.cs +++ b/src/tests/JIT/opt/InstructionCombining/Bfx.cs @@ -1,9 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System; using System.Runtime.CompilerServices; -using static TestLibrary.Expect; using Xunit; namespace TestBfx @@ -16,33 +14,184 @@ public static int CheckBfx() { bool fail = false; - ExpectEqual(() => ExtractBits_Int_NoShift(0x7F654321), 0x1, ref fail); - ExpectEqual(() => ExtractBits_Int_Shift(0x7F654321), 0xC, ref fail); - ExpectEqual(() => ExtractBits_Int_Shift_Multiple(0x7F654321), 0x15C, ref fail); - ExpectEqual(() => ExtractBits_Int_Shift_Non_Continous_Mask(0x7F654321), 0x43, ref fail); - ExpectEqual(() => ExtractBits_Int_Shift_Mask0xFF(0x7F654321), 0xC, ref fail); - ExpectEqual(() => ExtractBits_Int_Shift_Mask0xFFFF(0x7F654321), 0x950C, ref fail); - ExpectEqual(() => ExtractBits_UInt_NoShift(0xFEDCBA98u), 0x98u, ref fail); - ExpectEqual(() => ExtractBits_UInt_Shift(0xFEDCBA98u), 0x3FBu, ref fail); - ExpectEqual(() => ExtractBits_UInt_Shift_Multiple(0xFEDCBA98u), 0x1A18u, ref fail); - ExpectEqual(() => ExtractBits_UInt_Shift_Non_Continous_Mask(0xFEDCBA98u), 0x4, ref fail); - ExpectEqual(() => ExtractBits_UInt_Shift_Mask0xFF(0xFEDCBA98u), 0xEAu, ref fail); - ExpectEqual(() => ExtractBits_UInt_Shift_Mask0xFFFF(0xFEDCBA98u), 0x72EAu, ref fail); - ExpectEqual(() => ExtractBits_Long_NoShift(0x7FFFEDCBA9876543L), 0x6543L, ref fail); - ExpectEqual(() => ExtractBits_Long_Shift(0x7FFFEDCBA9876543L), 0x1D95L, ref fail); - ExpectEqual(() => ExtractBits_Long_Shift_Multiple(0x7FFFEDCBA9876543L), 0x47F6EL, ref fail); - ExpectEqual(() => ExtractBits_Long_Shift_Non_Continous_Mask(0x7FFFEDCBA9876543L), 0x14L, ref fail); - ExpectEqual(() => ExtractBits_Long_Shift_Mask0xFF(0x7FFFEDCBA9876543L), 0x95L, ref fail); - ExpectEqual(() => ExtractBits_Long_Shift_Mask0xFFFF(0x7FFFEDCBA9876543L), 0x1D95L, ref fail); - ExpectEqual(() => ExtractBits_Long_Shift_Mask0xFFFFFFFF(0x7FFFEDCBA9876543L), 0x2EA61D95L, ref fail); - ExpectEqual(() => ExtractBits_Long_Shift_Mask0xFFFFFFFFFFFFFFFF(0x7FFFEDCBA9876543L), 0x1FFFFB72EA61D95L, ref fail); - ExpectEqual(() => ExtractBits_ULong_NoShift(0xFFFEEDCBA9876543UL), 0x3, ref fail); - ExpectEqual(() => ExtractBits_ULong_Shift(0xFFFEEDCBA9876543UL), 0x261D95UL, ref fail); - ExpectEqual(() => ExtractBits_ULong_Shift_Multiple(0xFFFEEDCBA9876543UL), 0x1107F6EUL, ref fail); - ExpectEqual(() => ExtractBits_ULong_Shift_Non_Continous_Mask(0xFFFEEDCBA9876543UL), 0x8UL, ref fail); - ExpectEqual(() => ExtractBits_ULong_Shift_Mask0xFF(0xFFFEEDCBA9876543UL), 0x95UL, ref fail); - ExpectEqual(() => ExtractBits_ULong_Shift_Mask0xFFFF(0xFFFEEDCBA9876543UL), 0x1D95UL, ref fail); - ExpectEqual(() => ExtractBits_ULong_Shift_Mask0xFFFFFFFF(0xFFFEEDCBA9876543UL), 0xCBA98765, ref fail); + // Int + if (ExtractBits_Int_NoShift(0x7F654321) != 0x1) + { + fail = true; + } + + if (ExtractBits_Int_Shift(0x7F654321) != 0xC) + { + fail = true; + } + + if (ExtractBits_Int_Shift_Multiple(0x7F654321) != 0x15C) + { + fail = true; + } + + if (ExtractBits_Int_Shift_Non_Continous_Mask(0x7F654321) != 0x43) + { + fail = true; + } + + if (ExtractBits_Int_Shift_Mask0xFF(0x7F654321) != 0xC) + { + fail = true; + } + + if (ExtractBits_Int_Shift_Mask0xFFFF(0x7F654321) != 0x950C) + { + fail = true; + } + + if (ExtractBits_Int_Shift_Mask0xF_MaxWidth(0x7F654321) != 0x7) + { + fail = true; + } + + if (ExtractBits_Int_Shift_Mask0x1F_TooWide(0x7F654321) != 0x7) + { + fail = true; + } + + // UInt + if (ExtractBits_UInt_NoShift(0xFEDCBA98u) != 0x98u) + { + fail = true; + } + + if (ExtractBits_UInt_Shift(0xFEDCBA98u) != 0x3FBu) + { + fail = true; + } + + if (ExtractBits_UInt_Shift_Multiple(0xFEDCBA98u) != 0x1A18u) + { + fail = true; + } + + if (ExtractBits_UInt_Shift_Non_Continous_Mask(0xFEDCBA98u) != 0x4u) + { + fail = true; + } + + if (ExtractBits_UInt_Shift_Mask0xFF(0xFEDCBA98u) != 0xEAu) + { + fail = true; + } + + if (ExtractBits_UInt_Shift_Mask0xFFFF(0xFEDCBA98u) != 0x72EAu) + { + fail = true; + } + + if (ExtractBits_UInt_Shift_Mask0xF_MaxWidth(0xFEDCBA98u) != 0xFu) + { + fail = true; + } + + if (ExtractBits_UInt_Shift_Mask0x1F_TooWide(0xFEDCBA98u) != 0xFu) + { + fail = true; + } + + // Long + if (ExtractBits_Long_NoShift(0x7FFFEDCBA9876543L) != 0x6543L) + { + fail = true; + } + + if (ExtractBits_Long_Shift(0x7FFFEDCBA9876543L) != 0x1D95L) + { + fail = true; + } + + if (ExtractBits_Long_Shift_Multiple(0x7FFFEDCBA9876543L) != 0x47F6EL) + { + fail = true; + } + + if (ExtractBits_Long_Shift_Non_Continous_Mask(0x7FFFEDCBA9876543L) != 0x14L) + { + fail = true; + } + + if (ExtractBits_Long_Shift_Mask0xFF(0x7FFFEDCBA9876543L) != 0x95L) + { + fail = true; + } + + if (ExtractBits_Long_Shift_Mask0xFFFF(0x7FFFEDCBA9876543L) != 0x1D95L) + { + fail = true; + } + + if (ExtractBits_Long_Shift_Mask0xFFFFFFFF(0x7FFFEDCBA9876543L) != 0x2EA61D95L) + { + fail = true; + } + + if (ExtractBits_Long_Shift_Mask0xFFFFFFFFFFFFFFFF(0x7FFFEDCBA9876543L) != 0x1FFFFB72EA61D95L) + { + fail = true; + } + + if (ExtractBits_Long_Shift_Mask0xF_MaxWidth(0x7FFFEDCBA9876543L) != 0x7L) + { + fail = true; + } + + if (ExtractBits_Long_Shift_Mask0x1F_TooWide(0x7FFFEDCBA9876543L) != 0x7L) + { + fail = true; + } + + // ULong + if (ExtractBits_ULong_NoShift(0xFFFEEDCBA9876543UL) != 0x3UL) + { + fail = true; + } + + if (ExtractBits_ULong_Shift(0xFFFEEDCBA9876543UL) != 0x261D95UL) + { + fail = true; + } + + if (ExtractBits_ULong_Shift_Multiple(0xFFFEEDCBA9876543UL) != 0x1107F6EUL) + { + fail = true; + } + + if (ExtractBits_ULong_Shift_Non_Continous_Mask(0xFFFEEDCBA9876543UL) != 0x8UL) + { + fail = true; + } + + if (ExtractBits_ULong_Shift_Mask0xFF(0xFFFEEDCBA9876543UL) != 0x95UL) + { + fail = true; + } + + if (ExtractBits_ULong_Shift_Mask0xFFFF(0xFFFEEDCBA9876543UL) != 0x1D95UL) + { + fail = true; + } + + if (ExtractBits_ULong_Shift_Mask0xFFFFFFFF(0xFFFEEDCBA9876543UL) != 0xCBA98765UL) + { + fail = true; + } + + if (ExtractBits_ULong_Shift_Mask0xF_MaxWidth(0xFFFEEDCBA9876543UL) != 0xFUL) + { + fail = true; + } + + if (ExtractBits_ULong_Shift_Mask0x1F_TooWide(0xFFFEEDCBA9876543UL) != 0xFUL) + { + fail = true; + } if (fail) { @@ -99,6 +248,21 @@ static int ExtractBits_Int_Shift_Mask0xFFFF(int x) return (int)(((uint)x >> 6) & 0xFFFF); } + [MethodImpl(MethodImplOptions.NoInlining)] + static int ExtractBits_Int_Shift_Mask0xF_MaxWidth(int x) + { + //ARM64-FULL-LINE: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #28, #4 + return (x >> 28) & 0xF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int ExtractBits_Int_Shift_Mask0x1F_TooWide(int x) + { + //ARM64-FULL-LINE: asr {{w[0-9]+}}, {{w[0-9]+}}, #28 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #31 + return (x >> 28) & 0x1F; + } + [MethodImpl(MethodImplOptions.NoInlining)] static uint ExtractBits_UInt_NoShift(uint x) { @@ -147,6 +311,21 @@ static uint ExtractBits_UInt_Shift_Mask0xFFFF(uint x) return (x >> 6) & 0xFFFF; } + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ExtractBits_UInt_Shift_Mask0xF_MaxWidth(uint x) + { + //ARM64-FULL-LINE: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #28, #4 + return (x >> 28) & 0xFu; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static uint ExtractBits_UInt_Shift_Mask0x1F_TooWide(uint x) + { + //ARM64-FULL-LINE: lsr {{w[0-9]+}}, {{w[0-9]+}}, #28 + //ARM64-FULL-LINE: and {{w[0-9]+}}, {{w[0-9]+}}, #31 + return (x >> 28) & 0x1Fu; + } + [MethodImpl(MethodImplOptions.NoInlining)] static long ExtractBits_Long_NoShift(long x) { @@ -209,6 +388,21 @@ static long ExtractBits_Long_Shift_Mask0xFFFFFFFFFFFFFFFF(long x) return (long)(((ulong)x >> 6) & 0xFFFFFFFFFFFFFFFFUL); } + [MethodImpl(MethodImplOptions.NoInlining)] + static long ExtractBits_Long_Shift_Mask0xF_MaxWidth(long x) + { + //ARM64-FULL-LINE: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #60, #4 + return (x >> 60) & 0xF; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static long ExtractBits_Long_Shift_Mask0x1F_TooWide(long x) + { + //ARM64-FULL-LINE: asr {{x[0-9]+}}, {{x[0-9]+}}, #60 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #31 + return (x >> 60) & 0x1F; + } + [MethodImpl(MethodImplOptions.NoInlining)] static ulong ExtractBits_ULong_NoShift(ulong x) { @@ -263,5 +457,20 @@ static ulong ExtractBits_ULong_Shift_Mask0xFFFFFFFF(ulong x) //ARM64-FULL-LINE: lsr {{x[0-9]+}}, {{x[0-9]+}}, #8 return (x >> 8) & 0xFFFFFFFFUL; } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ExtractBits_ULong_Shift_Mask0xF_MaxWidth(ulong x) + { + //ARM64-FULL-LINE: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #60, #4 + return (x >> 60) & 0xFu; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static ulong ExtractBits_ULong_Shift_Mask0x1F_TooWide(ulong x) + { + //ARM64-FULL-LINE: lsr {{x[0-9]+}}, {{x[0-9]+}}, #60 + //ARM64-FULL-LINE: and {{x[0-9]+}}, {{x[0-9]+}}, #31 + return (x >> 60) & 0x1Fu; + } } } diff --git a/src/tests/JIT/opt/InstructionCombining/Bfx.csproj b/src/tests/JIT/opt/InstructionCombining/Bfx.csproj index 48ceb6f3570323..7642d55e5024a0 100644 --- a/src/tests/JIT/opt/InstructionCombining/Bfx.csproj +++ b/src/tests/JIT/opt/InstructionCombining/Bfx.csproj @@ -13,6 +13,5 @@ - From 4c4f3b35a278944555873999e7d52644dd5eca66 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Thu, 5 Feb 2026 12:33:49 +0000 Subject: [PATCH 04/13] Rename comp to m_compiler --- src/coreclr/jit/lower.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 8a495da2a2f6b2..74f9e2ec84d674 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -12043,7 +12043,7 @@ bool Lowering::TryLowerOrToBFI(GenTreeOp* tree, GenTree** next) { assert(tree->OperIs(GT_OR)); - if (!comp->opts.OptimizationEnabled()) + if (!m_compiler->opts.OptimizationEnabled()) { return false; } @@ -12135,8 +12135,8 @@ bool Lowering::TryLowerOrToBFI(GenTreeOp* tree, GenTree** next) var_types ty = genActualType(tree->TypeGet()); GenTreeBfm* bfm = - comp->gtNewBfiNode(ty, bfiPattern.base, bfiPattern.value, static_cast(bfiPattern.offset), - static_cast(bfiPattern.width)); + m_compiler->gtNewBfiNode(ty, bfiPattern.base, bfiPattern.value, static_cast(bfiPattern.offset), + static_cast(bfiPattern.width)); bfm->CopyCosts(tree); ContainCheckNode(bfm); @@ -12348,7 +12348,7 @@ bool Lowering::TryLowerOrToBFX(GenTreeOp* tree, GenTree** next) { assert(tree->OperIs(GT_AND)); - if (!comp->opts.OptimizationEnabled()) + if (!m_compiler->opts.OptimizationEnabled()) { return false; } @@ -12383,7 +12383,8 @@ bool Lowering::TryLowerOrToBFX(GenTreeOp* tree, GenTree** next) return false; } - GenTreeBfm* bfm = comp->gtNewBfxNode(ty, shiftVar, static_cast(offset), static_cast(width)); + GenTreeBfm* bfm = + m_compiler->gtNewBfxNode(ty, shiftVar, static_cast(offset), static_cast(width)); bfm->CopyCosts(tree); ContainCheckNode(bfm); From 17dab935822828de835ce795842ab3c094dd6ec7 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Tue, 24 Feb 2026 10:39:32 +0000 Subject: [PATCH 05/13] Handle TryGetUse() correctly for a BFX node --- src/coreclr/jit/gentree.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 88004c7e130b3e..b8c1f6bc5c033e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -6899,6 +6899,16 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) return false; #endif // FEATURE_HW_INTRINSICS +#ifdef TARGET_ARM64 + case GT_BFX: + if (operand == this->AsOp()->gtOp1) + { + *pUse = &this->AsOp()->gtOp1; + return true; + } + return false; +#endif + // Special nodes case GT_PHI: for (GenTreePhi::Use& phiUse : AsPhi()->Uses()) From 68b5f230cf545d296956524464e94b988b8b8b02 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Wed, 11 Mar 2026 14:43:14 +0000 Subject: [PATCH 06/13] Remove gtFlags &= ~GTF_SET_FLAGS Change-Id: I6ed28f62956404fb7d803c20bd3d1f8113996e14 --- src/coreclr/jit/gentree.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index b8c1f6bc5c033e..08960a571b878e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -7830,7 +7830,6 @@ GenTreeBfm* Compiler::gtNewBfiNode(var_types type, GenTree* base, GenTree* src, { GenTreeBfm* result = new (this, GT_BFI) GenTreeBfm(GT_BFI, type, base, src, offset, width); result->gtFlags |= (base->gtFlags | src->gtFlags) & (GTF_ALL_EFFECT); - result->gtFlags &= ~GTF_SET_FLAGS; return result; } @@ -7838,7 +7837,6 @@ GenTreeBfm* Compiler::gtNewBfxNode(var_types type, GenTree* base, unsigned offse { GenTreeBfm* result = new (this, GT_BFX) GenTreeBfm(GT_BFX, type, base, nullptr, offset, width); result->gtFlags |= (base->gtFlags & GTF_ALL_EFFECT); - result->gtFlags &= ~GTF_SET_FLAGS; return result; } #endif From 31f1051597d0d571aca81505f669fc82404eeb8e Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Wed, 11 Mar 2026 14:44:55 +0000 Subject: [PATCH 07/13] Change genCodeForBfi to take a GenTreeBfm Change-Id: Ib707d9ca89d85e66f7d37ad593a95048f7ff7ced --- src/coreclr/jit/codegen.h | 2 +- src/coreclr/jit/codegenarm64.cpp | 10 ++++------ src/coreclr/jit/codegenarmarch.cpp | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 822ef9d99b9d46..98ab0d19c0ef3c 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1261,7 +1261,7 @@ class CodeGen final : public CodeGenInterface void genCodeForJumpCompare(GenTreeOpCC* tree); void genCompareImmAndJump( GenCondition::Code cond, regNumber reg, ssize_t compareImm, emitAttr size, BasicBlock* target); - void genCodeForBfi(GenTreeOp* tree); + void genCodeForBfi(GenTreeBfm* tree); void genCodeForBfiz(GenTreeOp* tree); void genCodeForBfx(GenTreeOp* tree); #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 79520dfc2621a9..16d99aff47d9e4 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5848,22 +5848,20 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) // Arguments: // tree - the bitfield insert. // -void CodeGen::genCodeForBfi(GenTreeOp* tree) +void CodeGen::genCodeForBfi(GenTreeBfm* tree) { assert(tree->OperIs(GT_BFI)); - GenTreeBfm* bfm = tree->AsBfm(); - emitAttr size = emitActualTypeSize(tree); unsigned regBits = emitter::getBitWidth(size); GenTree* base = tree->gtGetOp1(); GenTree* src = tree->gtGetOp2(); - genConsumeOperands(bfm); + genConsumeOperands(tree); - unsigned offset = bfm->GetOffset(); - unsigned width = bfm->GetWidth(); + unsigned offset = tree->GetOffset(); + unsigned width = tree->GetWidth(); assert(width >= 1 && width <= regBits); assert(offset < regBits && (offset + width) <= regBits); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 7f2a1c8f824a39..983e00b1626b78 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -330,7 +330,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; case GT_BFI: - genCodeForBfi(treeNode->AsOp()); + genCodeForBfi(treeNode->AsBfm()); break; case GT_BFIZ: From 543d2de15fa01cebcc4165c754c70f27601bd9b5 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Wed, 11 Mar 2026 14:50:06 +0000 Subject: [PATCH 08/13] Change genCodeForBfx to take a GenTreeBfm Change-Id: Ie4d17eabc9710c88927be211863b07d4e2933650 --- src/coreclr/jit/codegen.h | 2 +- src/coreclr/jit/codegenarm64.cpp | 9 ++++----- src/coreclr/jit/codegenarmarch.cpp | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 98ab0d19c0ef3c..920315106a2718 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1263,7 +1263,7 @@ class CodeGen final : public CodeGenInterface GenCondition::Code cond, regNumber reg, ssize_t compareImm, emitAttr size, BasicBlock* target); void genCodeForBfi(GenTreeBfm* tree); void genCodeForBfiz(GenTreeOp* tree); - void genCodeForBfx(GenTreeOp* tree); + void genCodeForBfx(GenTreeBfm* tree); #endif // TARGET_ARM64 void genEHCatchRet(BasicBlock* block); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 16d99aff47d9e4..752ab41947a761 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5905,18 +5905,17 @@ void CodeGen::genCodeForBfiz(GenTreeOp* tree) // Arguments: // tree - the bitfield extract. // -void CodeGen::genCodeForBfx(GenTreeOp* tree) +void CodeGen::genCodeForBfx(GenTreeBfm* tree) { assert(tree->OperIs(GT_BFX)); - GenTreeBfm* bfm = tree->AsBfm(); - emitAttr size = emitActualTypeSize(tree); + emitAttr size = emitActualTypeSize(tree); GenTree* src = tree->gtGetOp1(); const unsigned bitWidth = emitter::getBitWidth(size); - const unsigned lsb = bfm->GetOffset(); - const unsigned width = bfm->GetWidth(); + const unsigned lsb = tree->GetOffset(); + const unsigned width = tree->GetWidth(); assert((bitWidth == 32) || (bitWidth == 64)); assert(lsb < bitWidth); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 983e00b1626b78..062b39dcfc5299 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -338,7 +338,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; case GT_BFX: - genCodeForBfx(treeNode->AsOp()); + genCodeForBfx(treeNode->AsBfm()); break; #endif // TARGET_ARM64 From 9e760c709ad889d985feb337630612842be9e33f Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Wed, 11 Mar 2026 14:53:49 +0000 Subject: [PATCH 09/13] Remove copying costs Change-Id: I08b0a2e4515a0fb445f646d545648e180ebdb893 --- src/coreclr/jit/lower.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 18cb1145672141..a1f302b23da241 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -12223,7 +12223,6 @@ bool Lowering::TryLowerOrToBFI(GenTreeOp* tree, GenTree** next) GenTreeBfm* bfm = m_compiler->gtNewBfiNode(ty, bfiPattern.base, bfiPattern.value, static_cast(bfiPattern.offset), static_cast(bfiPattern.width)); - bfm->CopyCosts(tree); ContainCheckNode(bfm); @@ -12471,7 +12470,6 @@ bool Lowering::TryLowerOrToBFX(GenTreeOp* tree, GenTree** next) GenTreeBfm* bfm = m_compiler->gtNewBfxNode(ty, shiftVar, static_cast(offset), static_cast(width)); - bfm->CopyCosts(tree); ContainCheckNode(bfm); From 95b5ab2d04b40dc9ea7e16777682b6353343d156 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Wed, 11 Mar 2026 14:55:53 +0000 Subject: [PATCH 10/13] Set bfm to unused value if without users Change-Id: I015989fa1653edf79fb494ec727ca12555389420 --- src/coreclr/jit/lower.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index a1f302b23da241..d39e44cec4ef0d 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -12233,6 +12233,10 @@ bool Lowering::TryLowerOrToBFI(GenTreeOp* tree, GenTree** next) { use.ReplaceWith(bfm); } + else + { + bfm->SetUnusedValue(); + } // Remove old nodes depending on pattern kind switch (bfiPattern.kind) @@ -12480,6 +12484,10 @@ bool Lowering::TryLowerOrToBFX(GenTreeOp* tree, GenTree** next) { use.ReplaceWith(bfm); } + else + { + bfm->SetUnusedValue(); + } BlockRange().Remove(shiftConst); BlockRange().Remove(shift); From 3fb9771a4c8361790e7d0af227000ce02ecc5909 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Thu, 12 Mar 2026 08:05:35 +0000 Subject: [PATCH 11/13] Add IsInvariantInRange check Change-Id: I00caf785bfe1fb9ac1a56ea1f13248084200608d --- src/coreclr/jit/lower.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index d39e44cec4ef0d..04633dbd49a4f1 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -12140,6 +12140,11 @@ bool Lowering::TryLowerOrToBFI(GenTreeOp* tree, GenTree** next) return false; } + if (!IsInvariantInRange(bfiPattern.value, tree)) + { + return false; + } + unsigned regBits = genTypeSize(tree) * BITS_PER_BYTE; uint64_t regMask = (regBits == 64) ? UINT64_MAX : ((1ull << regBits) - 1); From 432e49c8b5087f7398b08bbf99ae877fd1f31fc2 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Tue, 17 Mar 2026 14:44:16 +0000 Subject: [PATCH 12/13] Fix formatting --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 04633dbd49a4f1..b60cebcbf9033b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -12142,7 +12142,7 @@ bool Lowering::TryLowerOrToBFI(GenTreeOp* tree, GenTree** next) if (!IsInvariantInRange(bfiPattern.value, tree)) { - return false; + return false; } unsigned regBits = genTypeSize(tree) * BITS_PER_BYTE; From aa1f1496c6a03a5e3443d3d379f36bcb91f32a26 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Tue, 17 Mar 2026 15:14:59 +0000 Subject: [PATCH 13/13] Fix visitor() change --- src/coreclr/jit/compiler.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index d4975d6eb33724..41284e3902356d 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4561,10 +4561,9 @@ GenTree::VisitResult GenTree::VisitOperandUses(TVisitor visitor) #ifdef TARGET_ARM64 case GT_BFX: { - GenTree* op1 = gtGetOp1(); - if (op1 != nullptr) + if (AsOp()->gtOp1 != nullptr) { - return visitor(op1); + RETURN_IF_ABORT(visitor(&AsOp()->gtOp1)); } return VisitResult::Continue; }