From 5715615daa97377d37e35094e1f16d7fd8f882f4 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 28 Feb 2024 14:25:51 +0000 Subject: [PATCH 1/4] ARM64-SVE: Implement IF_SVE_BV_2A --- src/coreclr/jit/codegenarm64test.cpp | 40 +++++++++++ src/coreclr/jit/emitarm64.cpp | 102 ++++++++++++++++++++++++--- src/coreclr/jit/emitarm64.h | 6 ++ 3 files changed, 137 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 3844d96a7fa077..71d03d62ad6e40 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5365,6 +5365,46 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_Q, INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 .Q, .Q, .Q + // IF_SVE_BV_2A + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0, + INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V5, REG_P15, 27, + INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V31, REG_P0, -128, + INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P5, 127, + INS_OPTS_SCALABLE_B); // MOV ., /Z, #{, } + + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0, + INS_OPTS_SCALABLE_H); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V23, REG_P12, 10, + INS_OPTS_SCALABLE_S); // MOV ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V4, REG_P0, -128, + INS_OPTS_SCALABLE_D); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V19, REG_P15, 127, + INS_OPTS_SCALABLE_H); // MOV ., /Z, #{, } + + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 256, + INS_OPTS_SCALABLE_S); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 3072, + INS_OPTS_SCALABLE_D); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -3072, + INS_OPTS_SCALABLE_H); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -32768, + INS_OPTS_SCALABLE_S); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_P0, REG_V0, 32512, + INS_OPTS_SCALABLE_D); // MOV ., /Z, #{, } + + // IF_SVE_BV_2A_A + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P12, 5, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_PREDICATE_MERGE); // CPY ., /M, #{, } + + // IF_SVE_BV_2A_J + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, 5632, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV ., /M, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, -5632, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV ., /M, #{, } + // IF_SVE_BZ_3A theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // TBL ., {.}, . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5a98ac3d07e279..9458c68739c55c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1127,6 +1127,18 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm4From1(emitGetInsSC(id))); break; + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + assert(insOptsScalableStandard(id->idInsOpt())); // xx + // Size specifier must be able to fit left-shifted immediate + assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || !id->idOptionalShift()); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // gggg + assert(isValidSimm8(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements elemsize = id->idOpSize(); assert(insOptsScalableStandard(id->idInsOpt())); @@ -9154,16 +9166,18 @@ void emitter::emitIns_R_R_I(instruction ins, insOpts opt /* = INS_OPTS_NONE */, insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - bool isLdSt = false; - bool isLdrStr = false; - bool isSIMD = false; - bool isAddSub = false; - bool setFlags = false; - unsigned scale = 0; - bool unscaledOp = false; + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + bool isLdSt = false; + bool isLdrStr = false; + bool isSIMD = false; + bool isAddSub = false; + bool setFlags = false; + unsigned scale = 0; + bool unscaledOp = false; + bool optionalShift = false; + bool hasShift = false; /* Figure out the encoding format of the instruction */ switch (ins) @@ -9746,6 +9760,31 @@ void emitter::emitIns_R_R_I(instruction ins, } break; + case INS_sve_mov: + case INS_sve_cpy: + optionalShift = true; + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); // DDDDD + assert(isPredicateRegister(reg2)); // GGGG + if (!isValidSimm8(imm)) + { + assert(isValidSimm8_MultipleOf256(imm)); + assert(insOptsScalableAtLeastHalf(opt)); + hasShift = true; + imm = imm / 256; + } + if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE) + { + fmt = IF_SVE_BV_2A_J; + } + else + { + fmt = IF_SVE_BV_2A; + } + // MOV is an alias for CPY, and is always the preferred disassembly. + ins = INS_sve_mov; + break; + case INS_sve_sqrshrn: case INS_sve_sqrshrun: case INS_sve_uqrshrn: @@ -10072,7 +10111,18 @@ void emitter::emitIns_R_R_I(instruction ins, assert(fmt != IF_NONE); - instrDesc* id = emitNewInstrSC(attr, imm); + instrDesc* id; + + if (!optionalShift) + { + id = emitNewInstrSC(attr, imm); + } + else + { + // Instructions with optional shifts (MOV, DUP, etc.) need larger instrDesc to store state + id = emitNewInstrCns(attr, imm); + id->idOptionalShift(hasShift); + } id->idIns(ins); id->idInsFmt(fmt); @@ -23578,6 +23628,19 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_19_to_16(id->idReg2()); // gggg + code |= insEncodeImm8_12_to_5(imm); // iiiiiiii + code |= (id->idOptionalShift() ? 0x2000 : 0); // h + code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD @@ -28867,6 +28930,16 @@ void emitter::emitDispInsHelp( emitDispImm(imm, false); break; + // ., /Z, #{, } + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg + emitDispImmOptsLSL(emitGetInsSC(id), id->idOptionalShift(), 8); // iiiiiiii, h + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -31303,6 +31376,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index a5dafc0c12dfd1..bc652bee454bbe 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -860,6 +860,12 @@ static bool isValidSimm8(ssize_t value) return (-0x80 <= value) && (value <= 0x7F); }; +// Returns true if 'value' is a legal signed multiple of 256 immediate 8 bit encoding (such as for MOV). +static bool isValidSimm8_MultipleOf256(ssize_t value) +{ + return (-0x8000 <= value) && (value <= 0x7f00) && (value % 256 == 0); +}; + // Returns true if 'value' is a legal unsigned immediate 12 bit encoding (such as for CMP, CMN). static bool isValidUimm12(ssize_t value) { From 2f3f38f4b6075df9f06455cc508a1dd6df24dbb5 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 28 Feb 2024 15:37:36 +0000 Subject: [PATCH 2/4] Add TODO --- src/coreclr/jit/instr.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 3120b2ac87fc60..b9771edf169222 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -370,6 +370,8 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR, // Variants with {., .} predicate pair (eg whilege) INS_SCALABLE_OPTS_VL_2X, // Variants with a vector length specifier of 2x (eg whilege) INS_SCALABLE_OPTS_VL_4X, // Variants with a vector length specifier of 4x (eg whilege) + + // TODO-SVE: Remove and pass the full immediate value instead. INS_SCALABLE_OPTS_SHIFT, // Variants with an optional shift operation (eg dup) INS_SCALABLE_OPTS_LSL_N, // Variants with a LSL #N (eg {.}, , [, , LSL #2]) From 1404a78b6f093a3839cf3b9389303a6192a71529 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 29 Feb 2024 10:48:33 +0000 Subject: [PATCH 3/4] Add sopt assert --- src/coreclr/jit/emitarm64.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 9458c68739c55c..38f6024b48c810 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -9779,6 +9779,7 @@ void emitter::emitIns_R_R_I(instruction ins, } else { + assert(sopt == INS_SCALABLE_OPTS_NONE); fmt = IF_SVE_BV_2A; } // MOV is an alias for CPY, and is always the preferred disassembly. From df49130940d6513137091a1de484b2815c66f167 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 1 Mar 2024 17:24:30 +0000 Subject: [PATCH 4/4] Remove IF_SVE_BV_2A_A handling --- src/coreclr/jit/emitarm64.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 0d86ff79e37f02..3b52f49e689837 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1145,7 +1145,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) assert(insOptsScalableStandard(id->idInsOpt())); // xx // Size specifier must be able to fit left-shifted immediate @@ -24156,7 +24155,6 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); @@ -29710,7 +29708,6 @@ void emitter::emitDispInsHelp( // ., /Z, #{, } case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) imm = emitGetInsSC(id); emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd @@ -32252,7 +32249,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C;