diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 72a56b236a455c..7a102104ca773c 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5431,6 +5431,46 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_Q, INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 .Q, .Q, .Q + // IF_SVE_BV_2A + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0, + INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V5, REG_P15, 27, + INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V31, REG_P0, -128, + INS_OPTS_SCALABLE_B); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P5, 127, + INS_OPTS_SCALABLE_B); // MOV ., /Z, #{, } + + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V15, REG_P5, 0, + INS_OPTS_SCALABLE_H); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V23, REG_P12, 10, + INS_OPTS_SCALABLE_S); // MOV ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V4, REG_P0, -128, + INS_OPTS_SCALABLE_D); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V19, REG_P15, 127, + INS_OPTS_SCALABLE_H); // MOV ., /Z, #{, } + + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 256, + INS_OPTS_SCALABLE_S); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, 3072, + INS_OPTS_SCALABLE_D); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -3072, + INS_OPTS_SCALABLE_H); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P0, -32768, + INS_OPTS_SCALABLE_S); // CPY ., /Z, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_P0, REG_V0, 32512, + INS_OPTS_SCALABLE_D); // MOV ., /Z, #{, } + + // IF_SVE_BV_2A_A + theEmitter->emitIns_R_R_I(INS_sve_cpy, EA_SCALABLE, REG_V1, REG_P12, 5, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_PREDICATE_MERGE); // CPY ., /M, #{, } + + // IF_SVE_BV_2A_J + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, 5632, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV ., /M, #{, } + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V27, REG_P13, -5632, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV ., /M, #{, } + // IF_SVE_BZ_3A theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // TBL ., {.}, . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f96bac7fc3f11c..f504f39e55b5d4 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1153,6 +1153,17 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm4From1(emitGetInsSC(id))); break; + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + assert(insOptsScalableStandard(id->idInsOpt())); // xx + // Size specifier must be able to fit left-shifted immediate + assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || !id->idOptionalShift()); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // gggg + assert(isValidSimm8(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector assert(isPredicateRegister(id->idReg1())); // DDDD assert(isVectorRegister(id->idReg2())); // nnnnn @@ -9404,16 +9415,18 @@ void emitter::emitIns_R_R_I(instruction ins, insOpts opt /* = INS_OPTS_NONE */, insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - bool isLdSt = false; - bool isLdrStr = false; - bool isSIMD = false; - bool isAddSub = false; - bool setFlags = false; - unsigned scale = 0; - bool unscaledOp = false; + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + bool isLdSt = false; + bool isLdrStr = false; + bool isSIMD = false; + bool isAddSub = false; + bool setFlags = false; + unsigned scale = 0; + bool unscaledOp = false; + bool optionalShift = false; + bool hasShift = false; /* Figure out the encoding format of the instruction */ switch (ins) @@ -10058,6 +10071,32 @@ void emitter::emitIns_R_R_I(instruction ins, fmt = IF_SVE_BB_2A; break; + case INS_sve_mov: + case INS_sve_cpy: + optionalShift = true; + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); // DDDDD + assert(isPredicateRegister(reg2)); // GGGG + if (!isValidSimm8(imm)) + { + assert(isValidSimm8_MultipleOf256(imm)); + assert(insOptsScalableAtLeastHalf(opt)); + hasShift = true; + imm = imm / 256; + } + if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE) + { + fmt = IF_SVE_BV_2A_J; + } + else + { + assert(sopt == INS_SCALABLE_OPTS_NONE); + fmt = IF_SVE_BV_2A; + } + // MOV is an alias for CPY, and is always the preferred disassembly. + ins = INS_sve_mov; + break; + case INS_sve_pmov: if (sopt == INS_SCALABLE_OPTS_TO_PREDICATE) { @@ -10477,7 +10516,18 @@ void emitter::emitIns_R_R_I(instruction ins, assert(fmt != IF_NONE); - instrDesc* id = emitNewInstrSC(attr, imm); + instrDesc* id; + + if (!optionalShift) + { + id = emitNewInstrSC(attr, imm); + } + else + { + // Instructions with optional shifts (MOV, DUP, etc.) need larger instrDesc to store state + id = emitNewInstrCns(attr, imm); + id->idOptionalShift(hasShift); + } id->idIns(ins); id->idInsFmt(fmt); @@ -24365,6 +24415,17 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_19_to_16(id->idReg2()); // gggg + code |= insEncodeImm8_12_to_5(imm); // iiiiiiii + code |= (id->idOptionalShift() ? 0x2000 : 0); // h + code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + break; + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD @@ -30012,6 +30073,15 @@ void emitter::emitDispInsHelp( emitDispImm(imm, false); break; + // ., /Z, #{, } + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg + emitDispImmOptsLSL(emitGetInsSC(id), id->idOptionalShift(), 8); // iiiiiiii, h + break; + // ., .[] case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i imm = emitGetInsSC(id); @@ -32554,6 +32624,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index bb5abee8d2b5f5..114f7b678a6b79 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -936,6 +936,12 @@ static bool isValidSimm8(ssize_t value) return (-0x80 <= value) && (value <= 0x7F); }; +// Returns true if 'value' is a legal signed multiple of 256 immediate 8 bit encoding (such as for MOV). +static bool isValidSimm8_MultipleOf256(ssize_t value) +{ + return (-0x8000 <= value) && (value <= 0x7f00) && (value % 256 == 0); +}; + // Returns true if 'value' is a legal unsigned immediate 12 bit encoding (such as for CMP, CMN). static bool isValidUimm12(ssize_t value) { diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 6d99d81cd9df6c..1c8f6110ae4135 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -370,6 +370,8 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR, // Variants with {., .} predicate pair (eg whilege) INS_SCALABLE_OPTS_VL_2X, // Variants with a vector length specifier of 2x (eg whilege) INS_SCALABLE_OPTS_VL_4X, // Variants with a vector length specifier of 4x (eg whilege) + + // TODO-SVE: Remove and pass the full immediate value instead. INS_SCALABLE_OPTS_SHIFT, // Variants with an optional shift operation (eg dup) INS_SCALABLE_OPTS_LSL_N, // Variants with a LSL #N (eg {.}, , [, , LSL #2])