diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 03edaae7366903..f7c9567a358724 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5881,6 +5881,110 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_st4w, EA_SCALABLE, REG_V31, REG_P1, REG_R5, 28, INS_OPTS_SCALABLE_S); // ST4W {.S, .S, .S, .S }, , [{, // #, MUL VL}] + + // IF_SVE_JD_4A + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P1, REG_R2, REG_R0, + INS_OPTS_SCALABLE_B); // ST1B {.}, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P5, REG_R6, REG_R2, + INS_OPTS_SCALABLE_H); // ST1B {.}, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V6, REG_P5, REG_R7, REG_R4, + INS_OPTS_SCALABLE_S); // ST1B {.}, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P0, REG_R1, REG_R2, + INS_OPTS_SCALABLE_D); // ST1B {.}, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P6, REG_R1, REG_R2, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // ST1H {.}, , [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_R4, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // ST1H {.}, , [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P2, REG_R4, REG_R0, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST1H {.}, , [, , LSL #1] + + // IF_SVE_JD_4B + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R3, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // ST1W {.}, , [, , LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_R5, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST1W {.}, , [, , LSL #2] + + // IF_SVE_JJ_4A + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_V3, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1D {.D }, , [, .D, #3] + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_V3, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1D {.D }, , [, .D, #3] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P1, REG_R5, REG_V4, INS_OPTS_SCALABLE_S_UXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1H {.S }, , [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V3, REG_P1, REG_R5, REG_V4, INS_OPTS_SCALABLE_S_SXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1H {.S }, , [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P3, REG_R1, REG_V2, INS_OPTS_SCALABLE_S_UXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1W {.S }, , [, .S, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P3, REG_R1, REG_V2, INS_OPTS_SCALABLE_S_SXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1W {.S }, , [, .S, #2] + + // IF_SVE_JJ_4A_B + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P1, REG_R2, REG_V5, + INS_OPTS_SCALABLE_D_UXTW); // ST1D {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P1, REG_R2, REG_V5, + INS_OPTS_SCALABLE_D_SXTW); // ST1D {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V4, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1H {.D }, , [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V4, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1H {.D }, , [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1W {.D }, , [, .D, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // ST1W {.D }, , [, .D, #2] + + // IF_SVE_JJ_4A_C + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, + INS_OPTS_SCALABLE_D_UXTW); // ST1H {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P5, REG_R1, REG_V3, + INS_OPTS_SCALABLE_D_SXTW); // ST1H {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, + INS_OPTS_SCALABLE_D_UXTW); // ST1W {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V4, + INS_OPTS_SCALABLE_D_SXTW); // ST1W {.D }, , [, .D, ] + + // IF_SVE_JJ_4A_D + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V7, REG_P5, REG_R4, REG_V1, + INS_OPTS_SCALABLE_S_UXTW); // ST1H {.S }, , [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V7, REG_P5, REG_R4, REG_V1, + INS_OPTS_SCALABLE_S_SXTW); // ST1H {.S }, , [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V2, + INS_OPTS_SCALABLE_S_UXTW); // ST1W {.S }, , [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V2, + INS_OPTS_SCALABLE_S_SXTW); // ST1W {.S }, , [, .S, ] + + // IF_SVE_JK_4A + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P2, REG_R0, REG_V1, + INS_OPTS_SCALABLE_D_UXTW); // ST1B {.D }, , [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V4, REG_P2, REG_R0, REG_V1, + INS_OPTS_SCALABLE_D_SXTW); // ST1B {.D }, , [, .D, ] + + // IF_SVE_JK_4A_B + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V0, + INS_OPTS_SCALABLE_S_UXTW); // ST1B {.S }, , [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V0, + INS_OPTS_SCALABLE_S_SXTW); // ST1B {.S }, , [, .S, ] + + // IF_SVE_JN_3A + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 5, + INS_OPTS_SCALABLE_B); // ST1B {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 4, + INS_OPTS_SCALABLE_H); // ST1B {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P3, REG_R4, 3, + INS_OPTS_SCALABLE_H); // ST1H {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 2, + INS_OPTS_SCALABLE_S); // ST1B {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P3, REG_R4, 1, + INS_OPTS_SCALABLE_S); // ST1H {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 0, + INS_OPTS_SCALABLE_D); // ST1B {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P3, REG_R4, -2, + INS_OPTS_SCALABLE_D); // ST1H {.}, , [{, #, MUL VL}] + + // IF_SVE_JN_3B + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P1, REG_R3, 5, + INS_OPTS_SCALABLE_S); // ST1W {.}, , [{, #, MUL VL}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P1, REG_R3, 1, + INS_OPTS_SCALABLE_D); // ST1W {.}, , [{, #, MUL VL}] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 1877f0a9036e9f..42ea4d031eb810 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1500,6 +1500,70 @@ void emitter::emitInsSanityCheck(instrDesc* id) } break; + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + elemsize = id->idOpSize(); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); // xx + // st1h is reserved for scalable B + assert((id->idIns() == INS_sve_st1h) ? insOptsScalableAtLeastHalf(id->idInsOpt()) + : insOptsScalableStandard(id->idInsOpt())); + break; + + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); // x + break; + + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) + elemsize = id->idOpSize(); + assert(insOptsScalable32bitExtends(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(elemsize)); // xx + assert(isValidSimm4(imm)); // iiii + break; + + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(elemsize)); // x + assert(isValidSimm4(imm)); // iiii + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -5587,9 +5651,13 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) return EA_2BYTE; case INS_OPTS_SCALABLE_S: + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: return EA_4BYTE; case INS_OPTS_SCALABLE_D: + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: return EA_8BYTE; case INS_OPTS_SCALABLE_Q: @@ -10433,17 +10501,24 @@ void emitter::emitIns_R_R_R_I(instruction ins, } else { -#if DEBUG - if (ins == INS_sve_st1w) + if ((ins == INS_sve_st1w) && insOptsScalableWords(opt)) { - assert(opt == INS_OPTS_SCALABLE_Q); + fmt = IF_SVE_JN_3B; } else { - assert(opt == INS_OPTS_SCALABLE_D); - } +#if DEBUG + if (ins == INS_sve_st1w) + { + assert(opt == INS_OPTS_SCALABLE_Q); + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + } #endif // DEBUG - fmt = IF_SVE_JN_3C; + fmt = IF_SVE_JN_3C; + } } break; @@ -10528,6 +10603,17 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_JO_3A; break; + case INS_sve_st1b: + case INS_sve_st1h: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + // st1h is reserved for scalable B + assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : insOptsScalableStandard(opt)); + fmt = IF_SVE_JN_3A; + break; + default: unreached(); break; @@ -11057,6 +11143,166 @@ void emitter::emitIns_R_R_R_R(instruction ins, fmt = IF_SVE_AS_4A; break; + case INS_sve_st1b: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isScalableVectorSize(size)); + assert(insScalableOptsNone(sopt)); + if (insOptsScalableStandard(opt)) + { + assert(isGeneralRegister(reg4)); + fmt = IF_SVE_JD_4A; + } + else + { + assert(insOptsScalable32bitExtends(opt)); + switch (opt) + { + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: + fmt = IF_SVE_JK_4A_B; + break; + + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: + fmt = IF_SVE_JK_4A; + break; + + default: + assert(!"Invalid options for scalable"); + break; + } + } + break; + + case INS_sve_st1h: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isScalableVectorSize(size)); + if (insOptsScalableStandard(opt)) + { + // st1h is reserved for scalable B + assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : true); + assert(isGeneralRegister(reg4)); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + fmt = IF_SVE_JD_4A; + } + else + { + assert(insOptsScalable32bitExtends(opt)); + switch (opt) + { + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: + if (insScalableOptsNone(sopt)) + { + fmt = IF_SVE_JJ_4A_D; + } + else + { + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A; + } + break; + + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: + if (insScalableOptsNone(sopt)) + { + fmt = IF_SVE_JJ_4A_C; + } + else + { + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A_B; + } + break; + + default: + assert(!"Invalid options for scalable"); + break; + } + } + break; + + case INS_sve_st1w: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isScalableVectorSize(size)); + if (insOptsScalableStandard(opt)) + { + assert(isGeneralRegister(reg4)); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + fmt = IF_SVE_JD_4B; + } + else + { + assert(insOptsScalable32bitExtends(opt)); + switch (opt) + { + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: + if (insScalableOptsNone(sopt)) + { + fmt = IF_SVE_JJ_4A_D; + } + else + { + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A; + } + break; + + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: + if (insScalableOptsNone(sopt)) + { + fmt = IF_SVE_JJ_4A_C; + } + else + { + assert(sopt == INS_SCALABLE_OPTS_MOD_N); + fmt = IF_SVE_JJ_4A_B; + } + break; + + default: + assert(!"Invalid options for scalable"); + break; + } + } + break; + + case INS_sve_st1d: + assert(insOptsScalable32bitExtends(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isScalableVectorSize(size)); + switch (opt) + { + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: + if (sopt == INS_SCALABLE_OPTS_MOD_N) + { + fmt = IF_SVE_JJ_4A; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_JJ_4A_B; + } + break; + + default: + assert(!"Invalid options for scalable"); + break; + } + break; + default: unreached(); break; @@ -13885,6 +14131,56 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + * This specifically encodes the size at bit locations '22-21'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_22_to_21(emitAttr size) +{ + switch (size) + { + case EA_1BYTE: + return 0; + + case EA_2BYTE: + return (1 << 21); // set the bit at location 21 + + case EA_4BYTE: + return (1 << 22); // set the bit at location 22 + + case EA_8BYTE: + return (1 << 22) | (1 << 21); // set the bit at location 22 and 21 + + default: + assert(!"Invalid insOpt for vector register"); + } + return 0; +} + +/***************************************************************************** + * + * Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction + * This specifically encodes the field 'sz' at bit location '21'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_sz_21(emitAttr size) +{ + switch (size) + { + case EA_4BYTE: + return 0; + + case EA_8BYTE: + return (1 << 21); + + default: + assert(!"Invalid insOpt for vector register"); + } + return 0; +} + /***************************************************************************** * * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction @@ -13995,6 +14291,7 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_sve_ldff1d: case INS_sve_ldff1sw: case INS_sve_st1b: + case INS_sve_st1h: case INS_sve_ldff1sb: case INS_sve_ldff1b: case INS_sve_ldnt1sb: @@ -16907,6 +17204,80 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_R_20_to_16(id->idReg4()); // mmmmm + code |= insEncodeSveElemsize_22_to_21(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_R_20_to_16(id->idReg4()); // mmmmm + code |= insEncodeSveElemsize_sz_21(optGetSveElemsize(id->idInsOpt())); // x + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // // offsets) + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm + + switch (id->idInsOpt()) + { + case INS_OPTS_SCALABLE_S_SXTW: + case INS_OPTS_SCALABLE_D_SXTW: + code |= (1 << 14); // h + break; + + default: + break; + } + + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSimm4_19_to_16(imm); // iiii + code |= insEncodeSveElemsize_22_to_21(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSimm4_19_to_16(imm); // iiii + code |= insEncodeSveElemsize_sz_21(optGetSveElemsize(id->idInsOpt())); // x + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -17330,6 +17701,31 @@ void emitter::emitDispExtendOpts(insOpts opt) assert(!"Bad value"); } +/***************************************************************************** + * + * Prints the encoding for the Extend Type encoding + */ + +void emitter::emitDispSveExtendOpts(insOpts opt) +{ + switch (opt) + { + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_D_UXTW: + printf("uxtw"); + break; + + case INS_OPTS_SCALABLE_S_SXTW: + case INS_OPTS_SCALABLE_D_SXTW: + printf("sxtw"); + break; + + default: + assert(!"Bad value"); + break; + } +} + /***************************************************************************** * * Prints the encoding for the Extend Type encoding in loads/stores @@ -17368,7 +17764,7 @@ void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma) // void emitter::emitDispSveReg(regNumber reg, insOpts opt, bool addComma) { - assert(insOptsScalable(opt)); + assert(insOptsScalable(opt) || insOptsScalable32bitExtends(opt)); assert(isVectorRegister(reg)); printf(emitSveRegName(reg)); emitDispArrangement(opt); @@ -17598,6 +17994,8 @@ void emitter::emitDispArrangement(insOpts opt) str = "4s"; break; case INS_OPTS_SCALABLE_S: + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: str = "s"; break; case INS_OPTS_1D: @@ -17607,6 +18005,8 @@ void emitter::emitDispArrangement(insOpts opt) str = "2d"; break; case INS_OPTS_SCALABLE_D: + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: str = "d"; break; case INS_OPTS_SCALABLE_Q: @@ -19608,6 +20008,140 @@ void emitter::emitDispInsHelp( printf("]"); break; + // {.}, , [, ] + // {.}, , [, , LSL #1] + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn + if (ins == INS_sve_st1h) + { + emitDispReg(id->idReg4(), EA_8BYTE, true); // mmmmm + printf("lsl #1]"); + } + else + { + emitDispReg(id->idReg4(), EA_8BYTE, false); // mmmmm + printf("]"); + } + break; + + // {.}, , [, , LSL #2] + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn + emitDispReg(id->idReg4(), EA_8BYTE, true); // mmmmm + printf("lsl #2]"); + break; + + // {.D }, , [, .D, #3] + // {.S }, , [, .S, #1] + // {.S }, , [, .S, #2] + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D, ] + // {.D }, , [, .D, #1] + // {.D }, , [, .D, #2] + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D, ] + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.S }, , [, .S, ] + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D, ] + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + // {.S }, , [, .S, ] + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) + { + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn + emitDispSveReg(id->idReg4(), id->idInsOpt(), true); // mmmmm + emitDispSveExtendOpts(id->idInsOpt()); + switch (ins) + { + case INS_sve_st1b: + printf("]"); + break; + + case INS_sve_st1h: + if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) + { + printf("]"); + } + else + { + printf(" #1]"); + } + break; + + case INS_sve_st1w: + if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) + { + printf("]"); + } + else + { + printf(" #2]"); + } + break; + + case INS_sve_st1d: + if (fmt == IF_SVE_JJ_4A_B) + { + printf("]"); + } + else + { + printf(" #3]"); + } + break; + + default: + assert(!"Invalid instruction"); + break; + } + break; + } + + // {.}, , [{, #, MUL VL}] + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn + if (imm != 0) + { + emitDispImm(emitGetInsSC(id), true); // iiii + printf("mul vl"); + } + printf("]"); + break; + + // {.}, , [{, #, MUL VL}] + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn + if (imm != 0) + { + emitDispImm(emitGetInsSC(id), true); // iiii + printf("mul vl"); + } + printf("]"); + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -22468,6 +23002,26 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index e76290a28a28bd..18aeb6be10c12b 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -46,6 +46,7 @@ void emitDispFlags(insCflags flags); void emitDispBarrier(insBarrier barrier); void emitDispShiftOpts(insOpts opt); void emitDispExtendOpts(insOpts opt); +void emitDispSveExtendOpts(insOpts opt); void emitDispLSExtendOpts(insOpts opt); void emitDispReg(regNumber reg, emitAttr attr, bool addComma); void emitDispSveReg(regNumber reg, insOpts opt, bool addComma); @@ -483,6 +484,14 @@ static code_t insEncodeReg3Scale(bool isScaled); // Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 SVE vector instruction static code_t insEncodeSveElemsize(emitAttr size); +// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction +// This specifically encodes the size at bit locations '22-21'. +static code_t insEncodeSveElemsize_22_to_21(emitAttr size); + +// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction +// This specifically encodes the field 'sz' at bit location '21'. +static code_t insEncodeSveElemsize_sz_21(emitAttr size); + // Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 SVE vector instruction // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); @@ -1041,6 +1050,12 @@ inline static bool insOptsScalableWide(insOpts opt) return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S)); } +inline static bool insOptsScalable32bitExtends(insOpts opt) +{ + return ((opt == INS_OPTS_SCALABLE_S_UXTW) || (opt == INS_OPTS_SCALABLE_S_SXTW) || + (opt == INS_OPTS_SCALABLE_D_UXTW) || (opt == INS_OPTS_SCALABLE_D_SXTW)); +} + inline static bool insScalableOptsNone(insScalableOpts sopt) { // `sopt` is used for instructions with no extra encoding variants. diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 5fd9dd456d65cd..a75191d1a9b3e5 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -276,6 +276,11 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_D, INS_OPTS_SCALABLE_Q, + INS_OPTS_SCALABLE_S_UXTW, + INS_OPTS_SCALABLE_S_SXTW, + INS_OPTS_SCALABLE_D_UXTW, + INS_OPTS_SCALABLE_D_SXTW, + INS_OPTS_MSL, // Vector Immediate (shifting ones variant) INS_OPTS_S_TO_4BYTE, // Single to INT32 @@ -317,6 +322,9 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_VL_2X, // Variants with a vector length specifier of 2x (eg whilege) INS_SCALABLE_OPTS_VL_4X, // Variants with a vector length specifier of 4x (eg whilege) + INS_SCALABLE_OPTS_LSL_N, // Variants with a LSL #N (eg {.}, , [, , LSL #2]) + INS_SCALABLE_OPTS_MOD_N, // Variants with a #N (eg {.S }, , [, .S, #2]) + // Removable once REG_V0 and REG_P0 are distinct INS_SCALABLE_OPTS_UNPREDICATED, // Variants without a predicate (eg add) INS_SCALABLE_OPTS_UNPREDICATED_WIDE, // Variants without a predicate and wide elements (eg asr)