From 53e90f9a760db6418d34d515eecca1cb48102e93 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Sun, 3 Mar 2024 20:32:10 -0500 Subject: [PATCH 1/9] Add BO_1A --- src/coreclr/jit/codegenarm64test.cpp | 34 ++++++++++++++ src/coreclr/jit/emitarm64.cpp | 70 ++++++++++++++++++++++++++-- 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 72a56b236a455c..6844da47a88e26 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5872,6 +5872,40 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_PATTERN_I(INS_sve_incw, EA_SCALABLE, REG_V5, SVE_PATTERN_VL6, 16, INS_OPTS_SCALABLE_S); // INCW .S{, {, MUL #}} + // IF_SVE_BO_1A + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecb, EA_4BYTE, REG_R0, SVE_PATTERN_POW2, + 1); // SQDECB , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecd, EA_4BYTE, REG_R1, SVE_PATTERN_VL1, + 2); // SQDECD , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdech, EA_4BYTE, REG_R2, SVE_PATTERN_VL2, + 3); // SQDECH , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecw, EA_4BYTE, REG_R3, SVE_PATTERN_VL3, + 4); // SQDECW , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincb, EA_4BYTE, REG_R4, SVE_PATTERN_VL4, + 5); // SQINCB , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincd, EA_4BYTE, REG_R5, SVE_PATTERN_VL5, + 6); // SQINCD , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqinch, EA_4BYTE, REG_R6, SVE_PATTERN_VL6, + 7); // SQINCH , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincw, EA_4BYTE, REG_R7, SVE_PATTERN_VL7, + 8); // SQINCW , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecb, EA_4BYTE, REG_R8, SVE_PATTERN_VL8, + 9); // UQDECB {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecd, EA_4BYTE, REG_R9, SVE_PATTERN_VL16, + 10); // UQDECD {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdech, EA_4BYTE, REG_R10, SVE_PATTERN_VL32, + 11); // UQDECH {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecw, EA_4BYTE, REG_R11, SVE_PATTERN_VL64, + 12); // UQDECW {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincb, EA_4BYTE, REG_R12, SVE_PATTERN_VL128, + 13); // UQINCB {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincd, EA_4BYTE, REG_R13, SVE_PATTERN_VL256, + 14); // UQINCD {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqinch, EA_4BYTE, REG_R14, SVE_PATTERN_MUL4, + 15); // UQINCH {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_4BYTE, REG_R15, SVE_PATTERN_ALL, + 16); // UQINCW {, {, MUL #}} + // IF_SVE_CI_3A theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_P1, REG_P3, REG_P4, INS_OPTS_SCALABLE_B); // TRN1 ., ., . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f96bac7fc3f11c..7c78532e6bfb96 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1153,6 +1153,14 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm4From1(emitGetInsSC(id))); break; + case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_NONE); + assert(isGeneralRegister(id->idReg1())); + assert(elemsize == EA_4BYTE); + assert(isValidUimm4From1(emitGetInsSC(id))); + break; + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector assert(isPredicateRegister(id->idReg1())); // DDDD assert(isVectorRegister(id->idReg2())); // nnnnn @@ -16576,6 +16584,29 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, fmt = IF_SVE_BM_1A; break; + case INS_sve_sqincb: + case INS_sve_uqincb: + case INS_sve_sqdecb: + case INS_sve_uqdecb: + case INS_sve_sqinch: + case INS_sve_uqinch: + case INS_sve_sqdech: + case INS_sve_uqdech: + case INS_sve_sqincw: + case INS_sve_uqincw: + case INS_sve_sqdecw: + case INS_sve_uqdecw: + case INS_sve_sqincd: + case INS_sve_uqincd: + case INS_sve_sqdecd: + case INS_sve_uqdecd: + assert(insOptsNone(opt)); + assert(isGeneralRegister(reg1)); // ddddd + assert(isValidUimm4From1(imm)); // iiii + assert(size == EA_4BYTE); + fmt = IF_SVE_BO_1A; + break; + default: unreached(); break; @@ -24348,6 +24379,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) // Immediate and pattern to general purpose. case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count + case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_Rd(id->idReg1()); // ddddd @@ -28548,8 +28580,8 @@ void emitter::emitDispInsHelp( break; // {, {, MUL #}} - // {, {, MUL #}} case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + // {, {, MUL #}} case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count imm = emitGetInsSC(id); emitDispReg(id->idReg1(), size, true); // ddddd @@ -28557,7 +28589,7 @@ void emitter::emitDispInsHelp( if (imm > 1) { printf("mul "); - emitDispImm(emitGetInsSC(id), false, false); // iiii + emitDispImm(imm, false, false); // iiii } break; @@ -28571,7 +28603,38 @@ void emitter::emitDispInsHelp( if (imm > 1) { printf("mul "); - emitDispImm(emitGetInsSC(id), false, false); // iiii + emitDispImm(imm, false, false); // iiii + } + break; + + // , {, {, MUL #}} + // {, {, MUL #}} + case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count + switch (id->idIns()) + { + case INS_sve_sqincb: + case INS_sve_sqdecb: + case INS_sve_sqinch: + case INS_sve_sqdech: + case INS_sve_sqincw: + case INS_sve_sqdecw: + case INS_sve_sqincd: + case INS_sve_sqdecd: + emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd + emitDispReg(id->idReg1(), EA_4BYTE, true); + break; + + default: + emitDispReg(id->idReg1(), EA_4BYTE, true); // ddddd + break; + } + + imm = emitGetInsSC(id); + emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp + if (imm > 1) + { + printf("mul "); + emitDispImm(imm, false, false); // iiii } break; @@ -32536,6 +32599,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count + case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; break; From 692b5fa06d2421f0f12c1ee0d87bea604eb4c250 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Sun, 3 Mar 2024 20:46:50 -0500 Subject: [PATCH 2/9] Add BP_1A --- src/coreclr/jit/codegenarm64test.cpp | 26 +++++++++++++++++++++++ src/coreclr/jit/emitarm64.cpp | 31 +++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 6844da47a88e26..3de1b44d91d927 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5906,6 +5906,32 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_4BYTE, REG_R15, SVE_PATTERN_ALL, 16); // UQINCW {, {, MUL #}} + // IF_SVE_BP_1A + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecd, EA_SCALABLE, REG_V0, SVE_PATTERN_VL1, 1, + INS_OPTS_SCALABLE_H); // SQDECD .D{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdech, EA_SCALABLE, REG_V1, SVE_PATTERN_VL2, 2, + INS_OPTS_SCALABLE_S); // SQDECH .H{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecw, EA_SCALABLE, REG_V2, SVE_PATTERN_VL3, 3, + INS_OPTS_SCALABLE_D); // SQDECW .S{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincd, EA_SCALABLE, REG_V3, SVE_PATTERN_VL4, 4, + INS_OPTS_SCALABLE_H); // SQINCD .D{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqinch, EA_SCALABLE, REG_V4, SVE_PATTERN_VL5, 5, + INS_OPTS_SCALABLE_S); // SQINCH .H{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincw, EA_SCALABLE, REG_V5, SVE_PATTERN_VL6, 6, + INS_OPTS_SCALABLE_D); // SQINCW .S{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecd, EA_SCALABLE, REG_V6, SVE_PATTERN_VL7, 7, + INS_OPTS_SCALABLE_H); // UQDECD .D{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdech, EA_SCALABLE, REG_V7, SVE_PATTERN_VL8, 8, + INS_OPTS_SCALABLE_S); // UQDECH .H{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecw, EA_SCALABLE, REG_V8, SVE_PATTERN_VL16, 9, + INS_OPTS_SCALABLE_D); // UQDECW .S{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincd, EA_SCALABLE, REG_V9, SVE_PATTERN_VL32, 10, + INS_OPTS_SCALABLE_H); // UQINCD .D{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqinch, EA_SCALABLE, REG_V10, SVE_PATTERN_POW2, 11, + INS_OPTS_SCALABLE_S); // UQINCH .H{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_SCALABLE, REG_V11, SVE_PATTERN_ALL, 16, + INS_OPTS_SCALABLE_D); // UQINCW .S{, {, MUL #}} + // IF_SVE_CI_3A theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_P1, REG_P3, REG_P4, INS_OPTS_SCALABLE_B); // TRN1 ., ., . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 7c78532e6bfb96..b73f6e2cb8ec76 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1146,6 +1146,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count + case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count elemsize = id->idOpSize(); assert(insOptsScalableAtLeastHalf(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); @@ -16588,6 +16589,13 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, case INS_sve_uqincb: case INS_sve_sqdecb: case INS_sve_uqdecb: + assert(insOptsNone(opt)); + assert(isGeneralRegister(reg1)); // ddddd + assert(isValidUimm4From1(imm)); // iiii + assert(size == EA_4BYTE); + fmt = IF_SVE_BO_1A; + break; + case INS_sve_sqinch: case INS_sve_uqinch: case INS_sve_sqdech: @@ -16600,11 +16608,21 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, case INS_sve_uqincd: case INS_sve_sqdecd: case INS_sve_uqdecd: - assert(insOptsNone(opt)); - assert(isGeneralRegister(reg1)); // ddddd - assert(isValidUimm4From1(imm)); // iiii - assert(size == EA_4BYTE); - fmt = IF_SVE_BO_1A; + assert(isValidUimm4From1(imm)); // iiii + + if (insOptsNone(opt)) + { + assert(isGeneralRegister(reg1)); // ddddd + assert(size == EA_4BYTE); + fmt = IF_SVE_BO_1A; + } + else + { + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(size == EA_SCALABLE); + fmt = IF_SVE_BP_1A; + } break; default: @@ -24389,6 +24407,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count + case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd @@ -28597,6 +28616,7 @@ void emitter::emitDispInsHelp( // .H{, {, MUL #}} // .S{, {, MUL #}} case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count + case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count imm = emitGetInsSC(id); emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp @@ -32600,6 +32620,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count + case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; break; From a701332e5760ac08f64b8e408ee9abd7ef9cf3c9 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Sun, 3 Mar 2024 21:09:59 -0500 Subject: [PATCH 3/9] Add BQ_2A --- src/coreclr/jit/codegenarm64test.cpp | 10 +++++++ src/coreclr/jit/emitarm64.cpp | 45 ++++++++++++++++++++++++++++ src/coreclr/jit/emitarm64.h | 3 ++ 3 files changed, 58 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 3de1b44d91d927..ec0b529e0c4df5 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5932,6 +5932,16 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_SCALABLE, REG_V11, SVE_PATTERN_ALL, 16, INS_OPTS_SCALABLE_D); // UQINCW .S{, {, MUL #}} + // IF_SVE_BQ_2A + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0, + INS_OPTS_SCALABLE_B); // EXT .B, {.B, .B }, # + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V2, REG_V3, 5, + INS_OPTS_SCALABLE_B); // EXT .B, {.B, .B }, # + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V4, REG_V5, 128, + INS_OPTS_SCALABLE_B); // EXT .B, {.B, .B }, # + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_FP_LAST, 255, + INS_OPTS_SCALABLE_B); // EXT .B, {.B, .B }, # + // IF_SVE_CI_3A theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_P1, REG_P3, REG_P4, INS_OPTS_SCALABLE_B); // TRN1 ., ., . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index b73f6e2cb8ec76..c06803f49232db 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1162,6 +1162,13 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm4From1(emitGetInsSC(id))); break; + case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isValidUimm8(emitGetInsSC(id))); // iiiii iii + break; + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector assert(isPredicateRegister(id->idReg1())); // DDDD assert(isVectorRegister(id->idReg2())); // nnnnn @@ -10311,6 +10318,14 @@ void emitter::emitIns_R_R_I(instruction ins, fmt = IF_SVE_FU_2A; break; + case INS_sve_ext: + assert(opt == INS_OPTS_SCALABLE_B); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isValidUimm8(imm)); // iiiii iii + fmt = IF_SVE_BQ_2A; + break; + case INS_sve_dupq: assert(insOptsScalableStandard(opt)); assert(insScalableOptsNone(sopt)); @@ -21923,6 +21938,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)((imm & 0xFF) << 5); } +/***************************************************************************** + * + * Returns the encoding for the unsigned immediate value as 3-bits at bit locations '12-10'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm3_12_to_10(ssize_t imm) +{ + assert(isValidUimm3(imm)); + return (code_t)imm << 10; +} + /***************************************************************************** * * Returns the encoding for the unsigned immediate value as 3-bits at bit locations '18-16'. @@ -24406,6 +24432,16 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeUimm3_12_to_10(imm & 0b111); // iii + code |= insEncodeUimm5_20_to_16(imm >> 3); // iiiii + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count imm = emitGetInsSC(id); @@ -28658,6 +28694,14 @@ void emitter::emitDispInsHelp( } break; + // .B, {.B, .B }, # + case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispVectorRegList(id->idReg2(), 2, id->idInsOpt(), true); // nnnnn + emitDispImm(imm, false); // iiiii iii + break; + // ., ., .D case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd @@ -32621,6 +32665,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count + case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index bb5abee8d2b5f5..b6b762441ab831 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -738,6 +738,9 @@ static code_t insEncodeUimm6_21_to_16(ssize_t imm); // Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. static code_t insEncodeImm8_12_to_5(ssize_t imm); +// Returns the encoding for the unsigned immediate value as 3-bits at bit locations '12-10'. +static code_t insEncodeUimm3_12_to_10(ssize_t imm); + // Returns the encoding for the unsigned immediate value as 3-bits at bit locations '18-16'. static code_t insEncodeUimm3_18_to_16(ssize_t imm); From 6d4a60b33127706a87df9ef71567ddd020a6bbb7 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Sun, 3 Mar 2024 21:32:47 -0500 Subject: [PATCH 4/9] Add BQ_2B --- src/coreclr/jit/codegenarm64test.cpp | 24 +++++++++++++++++------- src/coreclr/jit/emitarm64.cpp | 25 +++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index ec0b529e0c4df5..fe655981be3156 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5933,14 +5933,24 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_D); // UQINCW .S{, {, MUL #}} // IF_SVE_BQ_2A + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT .B, {.B, .B }, # + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V2, REG_V3, 5, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT .B, {.B, .B }, # + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V4, REG_V5, 128, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT .B, {.B, .B }, # + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_FP_LAST, 255, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT .B, {.B, .B }, # + + // IF_SVE_BQ_2B theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0, - INS_OPTS_SCALABLE_B); // EXT .B, {.B, .B }, # - theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V2, REG_V3, 5, - INS_OPTS_SCALABLE_B); // EXT .B, {.B, .B }, # - theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V4, REG_V5, 128, - INS_OPTS_SCALABLE_B); // EXT .B, {.B, .B }, # - theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_FP_LAST, 255, - INS_OPTS_SCALABLE_B); // EXT .B, {.B, .B }, # + INS_OPTS_SCALABLE_B); // EXT .B, .B, .B, # + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V2, REG_V3, 31, + INS_OPTS_SCALABLE_B); // EXT .B, .B, .B, # + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V4, REG_V5, 64, + INS_OPTS_SCALABLE_B); // EXT .B, .B, .B, # + theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_V7, 255, + INS_OPTS_SCALABLE_B); // EXT .B, .B, .B, # // IF_SVE_CI_3A theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_P1, REG_P3, REG_P4, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index c06803f49232db..c1d96ee74fd3e4 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1163,6 +1163,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) + case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -10323,7 +10324,16 @@ void emitter::emitIns_R_R_I(instruction ins, assert(isVectorRegister(reg1)); // ddddd assert(isVectorRegister(reg2)); // nnnnn assert(isValidUimm8(imm)); // iiiii iii - fmt = IF_SVE_BQ_2A; + + if (sopt == INS_SCALABLE_OPTS_WITH_VECTOR_PAIR) + { + fmt = IF_SVE_BQ_2A; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_BQ_2B; + } break; case INS_sve_dupq: @@ -24433,10 +24443,11 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) + case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd - code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn/mmmmm code |= insEncodeUimm3_12_to_10(imm & 0b111); // iii code |= insEncodeUimm5_20_to_16(imm >> 3); // iiiii dst += emitOutput_Instr(dst, code); @@ -28702,6 +28713,15 @@ void emitter::emitDispInsHelp( emitDispImm(imm, false); // iiiii iii break; + // .B, .B, .B, # + case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // mmmmm + emitDispImm(imm, false); // iiiii iii + break; + // ., ., .D case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd @@ -32666,6 +32686,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) + case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; break; From d119f3541f96059605faa096249297299328102b Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Sun, 3 Mar 2024 23:18:26 -0500 Subject: [PATCH 5/9] Add BU_2A --- src/coreclr/jit/codegenarm64test.cpp | 14 +++++++++ src/coreclr/jit/emitarm64.cpp | 43 ++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index fe655981be3156..24596667fe4efe 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5952,6 +5952,20 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_V7, 255, INS_OPTS_SCALABLE_B); // EXT .B, .B, .B, # + // IF_SVE_BU_2A + theEmitter->emitIns_R_R_F(INS_sve_fcpy, EA_SCALABLE, REG_V0, REG_P1, 2.0, + INS_OPTS_SCALABLE_H); // FCPY ., /M, # + theEmitter->emitIns_R_R_F(INS_sve_fcpy, EA_SCALABLE, REG_V2, REG_P3, 1.0, + INS_OPTS_SCALABLE_S); // FCPY ., /M, # + theEmitter->emitIns_R_R_F(INS_sve_fcpy, EA_SCALABLE, REG_V4, REG_P5, -10.0, + INS_OPTS_SCALABLE_D); // FCPY ., /M, # + theEmitter->emitIns_R_R_F(INS_sve_fmov, EA_SCALABLE, REG_V6, REG_P7, -0.125, + INS_OPTS_SCALABLE_H); // FMOV ., /M, # + theEmitter->emitIns_R_R_F(INS_sve_fmov, EA_SCALABLE, REG_V8, REG_P9, 31.0, + INS_OPTS_SCALABLE_S); // FMOV ., /M, # + theEmitter->emitIns_R_R_F(INS_sve_fmov, EA_SCALABLE, REG_V10, REG_P11, 0.5, + INS_OPTS_SCALABLE_D); // FMOV ., /M, # + // IF_SVE_CI_3A theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_P1, REG_P3, REG_P4, INS_OPTS_SCALABLE_B); // TRN1 ., ., . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index c1d96ee74fd3e4..f1db898dabad74 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1170,6 +1170,15 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm8(emitGetInsSC(id))); // iiiii iii break; + case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) + imm = emitGetInsSC(id); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm8(imm) || isValidUimm8(imm)); // iiiiiiii + assert(isPredicateRegister(id->idReg2())); // gggg + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector assert(isPredicateRegister(id->idReg1())); // DDDD assert(isVectorRegister(id->idReg2())); // nnnnn @@ -10559,6 +10568,22 @@ void emitter::emitIns_R_R_F( fmt = IF_SVE_HM_2A; break; + case INS_sve_fmov: + case INS_sve_fcpy: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isPredicateRegister(reg2)); // gggg + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + floatImm8 fpi; + fpi.immFPIVal = 0; + canEncodeFloatImm8(immDbl, &fpi); + imm = fpi.immFPIVal; + fmt = IF_SVE_BU_2A; + + // FMOV is an alias for FCPY, and is always the preferred disassembly. + ins = INS_sve_fmov; + break; + default: unreached(); break; @@ -24463,6 +24488,16 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeImm8_12_to_5(imm); // iiiiiiii + code |= insEncodeReg_P_19_to_16(id->idReg2()); // gggg + code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD @@ -28722,6 +28757,13 @@ void emitter::emitDispInsHelp( emitDispImm(imm, false); // iiiii iii break; + // ., /M, # + case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(id->idInsFmt()), INS_OPTS_NONE, true); // gggg + emitDispFloatImm(emitGetInsSC(id)); // iiiiiiii + break; + // ., ., .D case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd @@ -32687,6 +32729,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) + case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; break; From 9fdf6f195018b392e207288d0317db8a0ac9581a Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 4 Mar 2024 11:34:29 -0500 Subject: [PATCH 6/9] Fix tests --- src/coreclr/jit/codegenarm64test.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 24596667fe4efe..9972bea9f952e6 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5908,29 +5908,29 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_BP_1A theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecd, EA_SCALABLE, REG_V0, SVE_PATTERN_VL1, 1, - INS_OPTS_SCALABLE_H); // SQDECD .D{, {, MUL #}} + INS_OPTS_SCALABLE_D); // SQDECD .D{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdech, EA_SCALABLE, REG_V1, SVE_PATTERN_VL2, 2, - INS_OPTS_SCALABLE_S); // SQDECH .H{, {, MUL #}} + INS_OPTS_SCALABLE_H); // SQDECH .H{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecw, EA_SCALABLE, REG_V2, SVE_PATTERN_VL3, 3, - INS_OPTS_SCALABLE_D); // SQDECW .S{, {, MUL #}} + INS_OPTS_SCALABLE_S); // SQDECW .S{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincd, EA_SCALABLE, REG_V3, SVE_PATTERN_VL4, 4, - INS_OPTS_SCALABLE_H); // SQINCD .D{, {, MUL #}} + INS_OPTS_SCALABLE_D); // SQINCD .D{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_sqinch, EA_SCALABLE, REG_V4, SVE_PATTERN_VL5, 5, - INS_OPTS_SCALABLE_S); // SQINCH .H{, {, MUL #}} + INS_OPTS_SCALABLE_H); // SQINCH .H{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincw, EA_SCALABLE, REG_V5, SVE_PATTERN_VL6, 6, - INS_OPTS_SCALABLE_D); // SQINCW .S{, {, MUL #}} + INS_OPTS_SCALABLE_S); // SQINCW .S{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecd, EA_SCALABLE, REG_V6, SVE_PATTERN_VL7, 7, - INS_OPTS_SCALABLE_H); // UQDECD .D{, {, MUL #}} + INS_OPTS_SCALABLE_D); // UQDECD .D{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdech, EA_SCALABLE, REG_V7, SVE_PATTERN_VL8, 8, - INS_OPTS_SCALABLE_S); // UQDECH .H{, {, MUL #}} + INS_OPTS_SCALABLE_H); // UQDECH .H{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecw, EA_SCALABLE, REG_V8, SVE_PATTERN_VL16, 9, - INS_OPTS_SCALABLE_D); // UQDECW .S{, {, MUL #}} + INS_OPTS_SCALABLE_S); // UQDECW .S{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincd, EA_SCALABLE, REG_V9, SVE_PATTERN_VL32, 10, - INS_OPTS_SCALABLE_H); // UQINCD .D{, {, MUL #}} + INS_OPTS_SCALABLE_D); // UQINCD .D{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_uqinch, EA_SCALABLE, REG_V10, SVE_PATTERN_POW2, 11, - INS_OPTS_SCALABLE_S); // UQINCH .H{, {, MUL #}} + INS_OPTS_SCALABLE_H); // UQINCH .H{, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_SCALABLE, REG_V11, SVE_PATTERN_ALL, 16, - INS_OPTS_SCALABLE_D); // UQINCW .S{, {, MUL #}} + INS_OPTS_SCALABLE_S); // UQINCW .S{, {, MUL #}} // IF_SVE_BQ_2A theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0, INS_OPTS_SCALABLE_B, From a470b778e3f65a27337c9814bf53e0e0212d7ca5 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 4 Mar 2024 21:07:24 -0500 Subject: [PATCH 7/9] Fix imm8 assert --- src/coreclr/jit/emitarm64.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f1db898dabad74..b21bea4a59ffd8 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1171,13 +1171,17 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) + { imm = emitGetInsSC(id); + floatImm8 fpImm; + fpImm.immFPIVal = (unsigned)imm; assert(insOptsScalableAtLeastHalf(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidSimm8(imm) || isValidUimm8(imm)); // iiiiiiii + assert(isValidSimm8((ssize_t)emitDecodeFloatImm8(fpImm))); // iiiiiiii assert(isPredicateRegister(id->idReg2())); // gggg assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx break; + } case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector assert(isPredicateRegister(id->idReg1())); // DDDD From f4974d3e2444806181abcb18e2571fbdb32c38fb Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 4 Mar 2024 21:11:27 -0500 Subject: [PATCH 8/9] Missed a line --- src/coreclr/jit/emitarm64.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index aa7c056cd57c7b..48c1f24447cc97 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -24561,6 +24561,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) code |= insEncodeImm8_12_to_5(imm); // iiiiiiii code |= (id->idOptionalShift() ? 0x2000 : 0); // h code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); break; case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector From bc910255f093e32c56fdabf3ca5c544b7b452db3 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 5 Mar 2024 13:32:15 -0500 Subject: [PATCH 9/9] Add 64-bit version of BO_1A --- src/coreclr/jit/codegenarm64test.cpp | 32 ++++++------ src/coreclr/jit/emitarm64.cpp | 77 ++++++++++++++++++++-------- src/coreclr/jit/emitarm64.h | 4 ++ 3 files changed, 75 insertions(+), 38 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 1d543e4c3dfb74..f5752a90166a82 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5915,36 +5915,36 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_BO_1A theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecb, EA_4BYTE, REG_R0, SVE_PATTERN_POW2, 1); // SQDECB , {, {, MUL #}} - theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecd, EA_4BYTE, REG_R1, SVE_PATTERN_VL1, - 2); // SQDECD , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecd, EA_8BYTE, REG_R1, SVE_PATTERN_VL1, + 2); // SQDECD {, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdech, EA_4BYTE, REG_R2, SVE_PATTERN_VL2, 3); // SQDECH , {, {, MUL #}} - theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecw, EA_4BYTE, REG_R3, SVE_PATTERN_VL3, - 4); // SQDECW , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecw, EA_8BYTE, REG_R3, SVE_PATTERN_VL3, + 4); // SQDECW {, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincb, EA_4BYTE, REG_R4, SVE_PATTERN_VL4, 5); // SQINCB , {, {, MUL #}} - theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincd, EA_4BYTE, REG_R5, SVE_PATTERN_VL5, - 6); // SQINCD , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincd, EA_8BYTE, REG_R5, SVE_PATTERN_VL5, + 6); // SQINCD {, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_sqinch, EA_4BYTE, REG_R6, SVE_PATTERN_VL6, 7); // SQINCH , {, {, MUL #}} - theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincw, EA_4BYTE, REG_R7, SVE_PATTERN_VL7, - 8); // SQINCW , {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_sqincw, EA_8BYTE, REG_R7, SVE_PATTERN_VL7, + 8); // SQINCW {, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecb, EA_4BYTE, REG_R8, SVE_PATTERN_VL8, 9); // UQDECB {, {, MUL #}} - theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecd, EA_4BYTE, REG_R9, SVE_PATTERN_VL16, - 10); // UQDECD {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecd, EA_8BYTE, REG_R9, SVE_PATTERN_VL16, + 10); // UQDECD {, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdech, EA_4BYTE, REG_R10, SVE_PATTERN_VL32, 11); // UQDECH {, {, MUL #}} - theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecw, EA_4BYTE, REG_R11, SVE_PATTERN_VL64, - 12); // UQDECW {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqdecw, EA_8BYTE, REG_R11, SVE_PATTERN_VL64, + 12); // UQDECW {, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincb, EA_4BYTE, REG_R12, SVE_PATTERN_VL128, 13); // UQINCB {, {, MUL #}} - theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincd, EA_4BYTE, REG_R13, SVE_PATTERN_VL256, - 14); // UQINCD {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincd, EA_8BYTE, REG_R13, SVE_PATTERN_VL256, + 14); // UQINCD {, {, MUL #}} theEmitter->emitIns_R_PATTERN_I(INS_sve_uqinch, EA_4BYTE, REG_R14, SVE_PATTERN_MUL4, 15); // UQINCH {, {, MUL #}} - theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_4BYTE, REG_R15, SVE_PATTERN_ALL, - 16); // UQINCW {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_8BYTE, REG_R15, SVE_PATTERN_ALL, + 16); // UQINCW {, {, MUL #}} // IF_SVE_BP_1A theEmitter->emitIns_R_PATTERN_I(INS_sve_sqdecd, EA_SCALABLE, REG_V0, SVE_PATTERN_VL1, 1, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 48c1f24447cc97..da8f4d55c66498 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1150,7 +1150,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) elemsize = id->idOpSize(); assert(insOptsScalableAtLeastHalf(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); - assert(elemsize == EA_SCALABLE); + assert(isScalableVectorSize(elemsize)); assert(isValidUimm4From1(emitGetInsSC(id))); break; @@ -1158,7 +1158,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) elemsize = id->idOpSize(); assert(id->idInsOpt() == INS_OPTS_NONE); assert(isGeneralRegister(id->idReg1())); - assert(elemsize == EA_4BYTE); + assert(isValidGeneralDatasize(elemsize)); assert(isValidUimm4From1(emitGetInsSC(id))); break; @@ -16595,8 +16595,7 @@ void emitter::emitIns_R_I_FLAGS_COND( void emitter::emitIns_R_PATTERN( instruction ins, emitAttr attr, regNumber reg1, insOpts opt, insSvePattern pattern /* = SVE_PATTERN_ALL*/) { - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; + insFormat fmt = IF_NONE; /* Figure out the encoding format of the instruction */ switch (ins) @@ -16641,9 +16640,8 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; /* Figure out the encoding format of the instruction */ switch (ins) @@ -16694,9 +16692,9 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, case INS_sve_sqdecb: case INS_sve_uqdecb: assert(insOptsNone(opt)); - assert(isGeneralRegister(reg1)); // ddddd - assert(isValidUimm4From1(imm)); // iiii - assert(size == EA_4BYTE); + assert(isGeneralRegister(reg1)); // ddddd + assert(isValidUimm4From1(imm)); // iiii + assert(isValidGeneralDatasize(size)); // X fmt = IF_SVE_BO_1A; break; @@ -16716,15 +16714,15 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, if (insOptsNone(opt)) { - assert(isGeneralRegister(reg1)); // ddddd - assert(size == EA_4BYTE); + assert(isGeneralRegister(reg1)); // ddddd + assert(isValidGeneralDatasize(size)); // X fmt = IF_SVE_BO_1A; } else { assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); // ddddd - assert(size == EA_SCALABLE); + assert(isScalableVectorSize(size)); fmt = IF_SVE_BP_1A; } break; @@ -16741,6 +16739,7 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, id->idIns(ins); id->idInsFmt(fmt); id->idInsOpt(opt); + id->idOpSize(size); id->idReg1(reg1); id->idSvePattern(pattern); @@ -16763,9 +16762,8 @@ void emitter::emitIns_PRFOP_R_R_R(instruction ins, insOpts opt /* = INS_OPTS_NONE */, insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; /* Figure out the encoding format of the instruction */ switch (ins) @@ -16874,9 +16872,8 @@ void emitter::emitIns_PRFOP_R_R_I(instruction ins, int imm, insOpts opt /* = INS_OPTS_NONE */) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; /* Figure out the encoding format of the instruction */ switch (ins) @@ -19615,6 +19612,28 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction + * This specifically encodes the field 'sz' at bit location '20'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_sz_20(emitAttr size) +{ + switch (size) + { + case EA_4BYTE: + return 0; + + case EA_8BYTE: + return (1 << 20); + + default: + assert(!"Invalid insOpt for vector register"); + } + return 0; +} + /***************************************************************************** * * Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction @@ -24512,7 +24531,6 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) // Immediate and pattern to general purpose. case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count - case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_Rd(id->idReg1()); // ddddd @@ -24521,6 +24539,16 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeSvePattern(id->idSvePattern()); // ppppp + code |= insEncodeUimm4From1_19_to_16(imm); // iiii + code |= insEncodeSveElemsize_sz_20(id->idOpSize()); // X + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) imm = emitGetInsSC(id); @@ -28776,6 +28804,7 @@ void emitter::emitDispInsHelp( break; // , {, {, MUL #}} + // {, {, MUL #}} // {, {, MUL #}} case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count switch (id->idIns()) @@ -28789,11 +28818,15 @@ void emitter::emitDispInsHelp( case INS_sve_sqincd: case INS_sve_sqdecd: emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd - emitDispReg(id->idReg1(), EA_4BYTE, true); + + if (size == EA_4BYTE) + { + emitDispReg(id->idReg1(), EA_4BYTE, true); + } break; default: - emitDispReg(id->idReg1(), EA_4BYTE, true); // ddddd + emitDispReg(id->idReg1(), size, true); // ddddd break; } diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index eca230f7a5a693..d5e2d24577016f 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -530,6 +530,10 @@ static code_t insEncodeSveElemsize_22_to_21(emitAttr size); // This specifically encodes the size at bit locations '18-17'. static code_t insEncodeSveElemsize_18_to_17(emitAttr size); +// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction +// This specifically encodes the field 'sz' at bit location '20'. +static code_t insEncodeSveElemsize_sz_20(emitAttr size); + // Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction // This specifically encodes the field 'sz' at bit location '21'. static code_t insEncodeSveElemsize_sz_21(emitAttr size);