diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 54c5a7aae710ab..b6e1162195b713 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2459,7 +2459,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) instruction ins = isUnsigned ? INS_umull : INS_smull; - regNumber r = emit->emitInsTernary(ins, EA_4BYTE, treeNode, op1, op2); + regNumber r = emit->emitInsTernary(ins, EA_8BYTE, treeNode, op1, op2); emit->emitIns_R_R_I(isUnsigned ? INS_lsr : INS_asr, EA_8BYTE, targetReg, targetReg, 32); } diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 2c7de903fded27..10b39a53c10e60 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -68,6 +68,16 @@ void CodeGen::genArm64EmitterUnitTestsGeneral() theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_R8, REG_R9, 1); theEmitter->emitIns_R_R_I(INS_ldursw, EA_8BYTE, REG_R8, REG_R9, 1); + theEmitter->emitIns_R_R_I(INS_stlurb, EA_1BYTE, REG_R7, REG_R10, 0); + theEmitter->emitIns_R_R_I(INS_stlurh, EA_2BYTE, REG_R2, REG_R10, 154); + theEmitter->emitIns_R_R_I(INS_stlur, EA_4BYTE, REG_R30, REG_R10, -256); + theEmitter->emitIns_R_R_I(INS_stlur, EA_8BYTE, REG_R23, REG_R10, 255); + + theEmitter->emitIns_R_R_I(INS_ldapurb, EA_1BYTE, REG_R13, REG_R9, -256); + theEmitter->emitIns_R_R_I(INS_ldapurh, EA_2BYTE, REG_R7, REG_R10, 101); + theEmitter->emitIns_R_R_I(INS_ldapur, EA_4BYTE, REG_R27, REG_R11, 19); + theEmitter->emitIns_R_R_I(INS_ldapur, EA_8BYTE, REG_R2, REG_R12, -173); + // SP and ZR tests theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_SP, 1); theEmitter->emitIns_R_R_I(INS_ldurb, EA_8BYTE, REG_ZR, REG_R9, 1); @@ -1220,12 +1230,6 @@ void CodeGen::genArm64EmitterUnitTestsGeneral() theEmitter->emitIns_R_R_R(INS_sdiv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_mul, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_mneg, EA_4BYTE, REG_R8, REG_R9, REG_R10); - theEmitter->emitIns_R_R_R(INS_smull, EA_4BYTE, REG_R8, REG_R9, REG_R10); - theEmitter->emitIns_R_R_R(INS_smnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10); - theEmitter->emitIns_R_R_R(INS_smulh, EA_4BYTE, REG_R8, REG_R9, REG_R10); - theEmitter->emitIns_R_R_R(INS_umull, EA_4BYTE, REG_R8, REG_R9, REG_R10); - theEmitter->emitIns_R_R_R(INS_umnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10); - theEmitter->emitIns_R_R_R(INS_umulh, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lslv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_lsrv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10); @@ -1609,10 +1613,6 @@ void CodeGen::genArm64EmitterUnitTestsGeneral() theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10); theEmitter->emitIns_R_R_R_R(INS_msub, EA_4BYTE, REG_R1, REG_R13, REG_R28, REG_R11); - theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_4BYTE, REG_R2, REG_R14, REG_R0, REG_R12); - theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_4BYTE, REG_R3, REG_R15, REG_R1, REG_R13); - theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_4BYTE, REG_R4, REG_R19, REG_R2, REG_R14); - theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_4BYTE, REG_R5, REG_R20, REG_R3, REG_R15); theEmitter->emitIns_R_R_R_R(INS_madd, EA_8BYTE, REG_R6, REG_R21, REG_R4, REG_R19); theEmitter->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_R7, REG_R22, REG_R5, REG_R20); @@ -1900,15 +1900,10 @@ void CodeGen::genArm64EmitterUnitTestsAdvSimd() theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_V7, REG_R10, 9); theEmitter->emitIns_R_R_I(INS_stur, EA_16BYTE, REG_V7, REG_R10, 17); - theEmitter->emitIns_R_R_I(INS_stlurb, EA_1BYTE, REG_V7, REG_R10, 0); - theEmitter->emitIns_R_R_I(INS_stlurh, EA_2BYTE, REG_V7, REG_R10, 0); - theEmitter->emitIns_R_R_I(INS_stlur, EA_4BYTE, REG_V7, REG_R10, 0); - theEmitter->emitIns_R_R_I(INS_stlur, EA_8BYTE, REG_V7, REG_R10, 0); - - theEmitter->emitIns_R_R_I(INS_ldapurb, EA_1BYTE, REG_V8, REG_R9, 0); - theEmitter->emitIns_R_R_I(INS_ldapurh, EA_2BYTE, REG_V8, REG_R9, 0); - theEmitter->emitIns_R_R_I(INS_ldapur, EA_4BYTE, REG_V8, REG_R9, 0); - theEmitter->emitIns_R_R_I(INS_ldapur, EA_8BYTE, REG_V8, REG_R9, 0); + theEmitter->emitIns_R_R_I(INS_ldapur, EA_1BYTE, REG_V27, REG_R19, 255); + theEmitter->emitIns_R_R_I(INS_ldapur, EA_2BYTE, REG_V15, REG_R9, -255); + theEmitter->emitIns_R_R_I(INS_ldapur, EA_4BYTE, REG_V1, REG_R25, 17); + theEmitter->emitIns_R_R_I(INS_ldapur, EA_8BYTE, REG_V8, REG_R2, -3); // load/store pair theEmitter->emitIns_R_R_R(INS_ldnp, EA_8BYTE, REG_V0, REG_V1, REG_R10); @@ -4558,6 +4553,34 @@ void CodeGen::genArm64EmitterUnitTestsAdvSimd() theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_8BYTE, REG_V5, REG_V13, REG_V21, REG_V29); theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_8BYTE, REG_V6, REG_V14, REG_V22, REG_V30); theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_8BYTE, REG_V7, REG_V15, REG_V23, REG_V31); + + // IF_DV_2U + theEmitter->emitIns_R_R(INS_sha1h, EA_4BYTE, REG_V3, REG_V17); + theEmitter->emitIns_R_R(INS_sha1su1, EA_16BYTE, REG_V22, REG_V15, INS_OPTS_4S); + theEmitter->emitIns_R_R(INS_sha256su0, EA_16BYTE, REG_V31, REG_V1, INS_OPTS_4S); + + // IF_DV_2V + theEmitter->emitIns_R_R(INS_sha512su0, EA_16BYTE, REG_V31, REG_V12, INS_OPTS_2D); + theEmitter->emitIns_R_R(INS_sm4e, EA_16BYTE, REG_V12, REG_V5, INS_OPTS_4S); + + // IF_DV_3H + theEmitter->emitIns_R_R_R(INS_sha512h, EA_16BYTE, REG_V3, REG_V31, REG_V8, INS_OPTS_2D); + theEmitter->emitIns_R_R_R(INS_sha512h2, EA_16BYTE, REG_V7, REG_V1, REG_V7, INS_OPTS_2D); + theEmitter->emitIns_R_R_R(INS_sha512su1, EA_16BYTE, REG_V31, REG_V10, REG_V31, INS_OPTS_2D); + theEmitter->emitIns_R_R_R(INS_rax1, EA_16BYTE, REG_V9, REG_V18, REG_V11, INS_OPTS_2D); + theEmitter->emitIns_R_R_R(INS_sm3partw1, EA_16BYTE, REG_V1, REG_V22, REG_V17, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_sm3partw2, EA_16BYTE, REG_V21, REG_V21, REG_V16, INS_OPTS_4S); + theEmitter->emitIns_R_R_R(INS_sm4ekey, EA_16BYTE, REG_V19, REG_V30, REG_V3, INS_OPTS_4S); + + // IF_DV_3I + theEmitter->emitIns_R_R_R_I(INS_xar, EA_16BYTE, REG_V2, REG_V30, REG_V1, 4, INS_OPTS_2D); + theEmitter->emitIns_R_R_R_I(INS_xar, EA_16BYTE, REG_V31, REG_V12, REG_V3, 63, INS_OPTS_2D); + theEmitter->emitIns_R_R_R_I(INS_xar, EA_16BYTE, REG_V23, REG_V9, REG_V31, 54, INS_OPTS_2D); + + // IF_DV_4B + theEmitter->emitIns_R_R_R_R(INS_eor3, EA_16BYTE, REG_V31, REG_V1, REG_V19, REG_V1, INS_OPTS_16B); + theEmitter->emitIns_R_R_R_R(INS_bcax, EA_16BYTE, REG_V12, REG_V27, REG_V6, REG_V4, INS_OPTS_16B); + theEmitter->emitIns_R_R_R_R(INS_sm3ss1, EA_16BYTE, REG_V7, REG_V5, REG_V28, REG_V15, INS_OPTS_4S); } /***************************************************************************** @@ -6967,11 +6990,11 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_EJ_3A theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V0, REG_V1, REG_V2, 0, INS_OPTS_SCALABLE_S); // CDOT ., ., ., - theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V3, REG_V4, REG_V5, 90, + theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V3, REG_V4, REG_V5, 1, INS_OPTS_SCALABLE_S); // CDOT ., ., ., - theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V7, REG_V8, 180, + theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V6, REG_V7, REG_V8, 2, INS_OPTS_SCALABLE_D); // CDOT ., ., ., - theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V9, REG_V10, REG_V11, 270, + theEmitter->emitIns_R_R_R_I(INS_sve_cdot, EA_SCALABLE, REG_V9, REG_V10, REG_V11, 3, INS_OPTS_SCALABLE_D); // CDOT ., ., ., // IF_SVE_EK_3A diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 0b4916141dc9e3..f5167c9476e996 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4120,7 +4120,7 @@ void CodeGen::genCodeForMulLong(GenTreeOp* mul) #ifdef TARGET_ARM GetEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, mul->GetRegNum(), mul->AsMultiRegOp()->gtOtherReg, srcReg1, srcReg2); #else - GetEmitter()->emitIns_R_R_R(ins, EA_4BYTE, mul->GetRegNum(), srcReg1, srcReg2); + GetEmitter()->emitIns_R_R_R(ins, EA_8BYTE, mul->GetRegNum(), srcReg1, srcReg2); #endif genProduceReg(mul); diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 202ec72d779bf6..4bd9c489a5c714 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -624,7 +624,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_DV_2A: // DV_2A .Q.......X...... ......nnnnnddddd Vd Vn (fabs, fcvt - vector) case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector) - case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*, sha1su1) + case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*) assert(isValidVectorDatasize(id->idOpSize())); assert(isValidArrangement(id->idOpSize(), id->idInsOpt())); assert(isVectorRegister(id->idReg1())); @@ -796,8 +796,16 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg2())); break; - case IF_DV_2U: // DV_2U ................ ......nnnnnddddd Sd Sn (sha1h) - assert(isValidGeneralDatasize(id->idOpSize())); + case IF_DV_2U: // DV_2U ................ ......nnnnnddddd Vd Vn (sha) + case IF_DV_2V: // DV_2V ................ ......nnnnnddddd Vd Vn (vector) + if (id->idIns() == INS_sha1h) + { + assert(id->idOpSize() == EA_4BYTE); + } + else + { + assert(isValidVectorDatasize(id->idOpSize())); + } assert(isVectorRegister(id->idReg1())); assert(isVectorRegister(id->idReg2())); break; @@ -931,7 +939,22 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg3())); break; - case IF_DV_4A: // DR_4A .........X.mmmmm .aaaaannnnnddddd Rd Rn Rm Ra (scalar) + case IF_DV_3H: // DV_3H ...........mmmmm .O....nnnnnddddd Vd Vn Vm (vector) + assert(id->idOpSize() == EA_16BYTE); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + break; + + case IF_DV_3I: // DV_3I ...........mmmmm iiiiiinnnnnddddd Vd Vn Vm imm6 (vector) + assert(id->idOpSize() == EA_16BYTE); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isValidUimm<6>(emitGetInsSC(id))); + break; + + case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Rd Rn Rm Ra (scalar) assert(isValidGeneralDatasize(id->idOpSize())); assert(isVectorRegister(id->idReg1())); assert(isVectorRegister(id->idReg2())); @@ -939,6 +962,14 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg4())); break; + case IF_DV_4B: // DV_4B ...........mmmmm .aaaaannnnnddddd Vd Vn Vm Va (vector) + assert(id->idOpSize() == EA_16BYTE); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isVectorRegister(id->idReg4())); + break; + case IF_PC_1A: // PC_1A ................ ...........ddddd Rd assert(id->idOpSize() == EA_8BYTE); assert(isGeneralRegister(id->idReg1())); @@ -1019,7 +1050,7 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc* id) case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp) case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar) case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector) - case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*, sha1su1) - Vd both source and + case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*) - Vd both source and // destination case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Sd Vn (faddp, fmaxnmp, fmaxp, fminnmp, @@ -1038,7 +1069,10 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc* id) case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) case IF_DV_3F: // DV_3F .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) + case IF_DV_3H: // DV_3H ...........mmmmm .O....nnnnnddddd Vd Vn Vm (vector) + case IF_DV_3I: // DV_3I ...........mmmmm iiiiiinnnnnddddd Vd Vn Vm imm6 (vector) case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) + case IF_DV_4B: // DV_4B ...........mmmmm .aaaaannnnnddddd Vd Vn Vm Va (vector) // Tracked GC pointers cannot be placed into the SIMD registers. return false; @@ -4992,6 +5026,7 @@ void emitter::emitIns_R_R(instruction ins, case INS_aese: case INS_aesmc: case INS_aesimc: + assert(size == EA_16BYTE); assert(isVectorRegister(reg1)); assert(isVectorRegister(reg2)); assert(isValidVectorDatasize(size)); @@ -5001,20 +5036,39 @@ void emitter::emitIns_R_R(instruction ins, break; case INS_sha1h: - assert(insOptsNone(opt)); + assert(size == EA_4BYTE); assert(isVectorRegister(reg1)); assert(isVectorRegister(reg2)); + assert(insOptsNone(opt)); fmt = IF_DV_2U; break; - case INS_sha256su0: case INS_sha1su1: + case INS_sha256su0: + assert(size == EA_16BYTE); assert(isVectorRegister(reg1)); assert(isVectorRegister(reg2)); - assert(isValidVectorDatasize(size)); elemsize = optGetElemsize(opt); assert(elemsize == EA_4BYTE); - fmt = IF_DV_2P; + fmt = IF_DV_2U; + break; + + case INS_sha512su0: + assert(size == EA_16BYTE); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + elemsize = optGetElemsize(opt); + assert(elemsize == EA_8BYTE); + fmt = IF_DV_2V; + break; + + case INS_sm4e: + assert(size == EA_16BYTE); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + elemsize = optGetElemsize(opt); + assert(elemsize == EA_4BYTE); + fmt = IF_DV_2V; break; case INS_ld2: @@ -5714,15 +5768,20 @@ void emitter::emitIns_R_R_I(instruction ins, case INS_ldurb: case INS_ldurh: - case INS_ldur: case INS_sturb: case INS_sturh: - case INS_stur: + case INS_stlurb: + case INS_stlurh: case INS_ldapurb: case INS_ldapurh: + assert(isGeneralRegisterOrZR(reg1)); + reg2 = encodingSPtoZR(reg2); + fmt = IF_LS_2C; + break; + case INS_ldapur: - case INS_stlurb: - case INS_stlurh: + case INS_ldur: + case INS_stur: case INS_stlur: reg2 = encodingSPtoZR(reg2); fmt = IF_LS_2C; @@ -6083,6 +6142,11 @@ void emitter::emitIns_R_R_R(instruction ins, assert(isGeneralRegister(reg1)); assert(isGeneralRegister(reg2)); assert(isGeneralRegister(reg3)); + if (ins == INS_smull || ins == INS_smnegl || ins == INS_smulh || ins == INS_umull || ins == INS_umnegl || + ins == INS_umulh) + { + assert(size == EA_8BYTE); + } fmt = IF_DR_3A; break; @@ -6677,6 +6741,29 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_DV_3A; break; + case INS_sha512h: + case INS_sha512h2: + case INS_sha512su1: + case INS_rax1: + assert(size == EA_16BYTE); + assert(opt == INS_OPTS_2D); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + fmt = IF_DV_3H; + break; + + case INS_sm3partw1: + case INS_sm3partw2: + case INS_sm4ekey: + assert(size == EA_16BYTE); + assert(opt == INS_OPTS_4S); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + fmt = IF_DV_3H; + break; + default: // fallback to emit SVE instructions. return emitInsSve_R_R_R(ins, attr, reg1, reg2, reg3, opt, sopt); @@ -7130,6 +7217,16 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_DV_3AI; break; + case INS_xar: + assert(size == EA_16BYTE); + assert(opt == INS_OPTS_2D); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm<6>(imm)); + fmt = IF_DV_3I; + break; + default: // fallback to emit SVE instructions. return emitInsSve_R_R_R_I(ins, attr, reg1, reg2, reg3, imm, opt, sopt); @@ -7569,11 +7666,20 @@ void emitter::emitIns_R_R_R_R(instruction ins, { case INS_madd: case INS_msub: + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isGeneralRegister(reg4)); + assert(insScalableOptsNone(sopt)); + fmt = IF_DR_4A; + break; + case INS_smaddl: case INS_smsubl: case INS_umaddl: case INS_umsubl: - assert(isValidGeneralDatasize(size)); + assert(size == EA_8BYTE); assert(isGeneralRegister(reg1)); assert(isGeneralRegister(reg2)); assert(isGeneralRegister(reg3)); @@ -7596,6 +7702,29 @@ void emitter::emitIns_R_R_R_R(instruction ins, fmt = IF_DV_4A; break; + case INS_eor3: + case INS_bcax: + assert(size == EA_16BYTE); + assert(opt == INS_OPTS_16B); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg4)); + assert(insScalableOptsNone(sopt)); + fmt = IF_DV_4B; + break; + + case INS_sm3ss1: + assert(size == EA_16BYTE); + assert(opt == INS_OPTS_4S); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg4)); + assert(insScalableOptsNone(sopt)); + fmt = IF_DV_4B; + break; + case INS_invalid: fmt = IF_NONE; break; @@ -9498,7 +9627,7 @@ void emitter::emitIns_Call(const EmitCallParams& params) } else { - assert(size == EA_4BYTE); + assert(size == EA_4BYTE || size == EA_2BYTE || size == EA_1BYTE); // no bits are set result = 0x00000000; } @@ -11138,8 +11267,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code |= insEncodeReg_Rt(id->idReg1()); // ttttt } code |= insEncodeIndexedOpt(id->idInsOpt()); // PP - code |= ((code_t)imm << 12); // iiiiiiiii - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + if (ins == INS_ldapur && isVectorRegister(id->idReg1())) + { + assert(insOptsNone(id->idInsOpt())); + // PP is different for vector LDAPUR + code |= 0x00000800; // set the bit at location 11 + } + code |= ((code_t)imm << 12); // iiiiiiiii + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn dst += emitOutput_Instr(dst, code); // With pre or post-indexing we may have a second GC register to @@ -12001,7 +12136,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - case IF_DV_2P: // DV_2P ............... ......nnnnnddddd Vd Vn (aes*, sha1su1) + case IF_DV_2P: // DV_2P ............... ......nnnnnddddd Vd Vn (aes*) elemsize = optGetElemsize(id->idInsOpt()); code = emitInsCode(ins, fmt); code |= insEncodeReg_Vd(id->idReg1()); // ddddd @@ -12028,7 +12163,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - case IF_DV_2U: // DV_2U ................ ......nnnnnddddd Sd Sn (sha1h) + case IF_DV_2U: // DV_2U ................ ......nnnnnddddd Vd Vn (sha) + case IF_DV_2V: // DV_2V ................ ......nnnnnddddd Vd Vn (vector) code = emitInsCode(ins, fmt); code |= insEncodeReg_Vd(id->idReg1()); // ddddd code |= insEncodeReg_Vn(id->idReg2()); // nnnnn @@ -12158,6 +12294,24 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_DV_3H: // DV_3H. ...........mmmmm .O....nnnnnddddd Vd Vn Vm (vector) + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); + break; + + case IF_DV_3I: // DV_3I ...........mmmmm iiiiiinnnnnddddd Vd Vn Vm imm6 (vector) + imm = emitGetInsSC(id); + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + code |= ((code_t)imm << 10); // iiiiii + dst += emitOutput_Instr(dst, code); + break; + case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) code = emitInsCode(ins, fmt); elemsize = id->idOpSize(); @@ -12169,6 +12323,15 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_DV_4B: // DV_4B ...........mmmmm .aaaaannnnnddddd Vd Vn Vm Va (vector) + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + code |= insEncodeReg_Va(id->idReg4()); // aaaaa + dst += emitOutput_Instr(dst, code); + break; + case IF_PC_1A: // PC_1A ................ ...........ddddd Rd assert(insOptsNone(id->idInsOpt())); code = emitInsCode(ins, fmt); @@ -13787,11 +13950,28 @@ void emitter::emitDispInsHelp( break; case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm - case IF_DV_2U: // DV_2U ................ ......nnnnnddddd Sd Sn emitDispReg(id->idReg1(), size, true); emitDispReg(id->idReg2(), size, false); break; + case IF_DV_2U: // DV_2U ................ ......nnnnnddddd Vd Vn + if (ins == INS_sha1h) + { + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, false); + } + else + { + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + } + break; + + case IF_DV_2V: // DV_2V ................ ......nnnnnddddd Vd Vn (vector) + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + break; + case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63) emitDispReg(id->idReg1(), size, true); emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size); @@ -13828,10 +14008,14 @@ void emitter::emitDispInsHelp( break; case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnmmmmm Rd Rn Rm + { + regNumber reg3 = id->idIsLclVar() ? codeGen->rsGetRsvdReg() : id->idReg3(); + if ((ins == INS_add) || (ins == INS_sub)) { emitDispReg(encodingZRtoSP(id->idReg1()), size, true); emitDispReg(encodingZRtoSP(id->idReg2()), size, true); + emitDispReg(reg3, size, false); } else if ((ins == INS_smulh) || (ins == INS_umulh)) { @@ -13839,30 +14023,23 @@ void emitter::emitDispInsHelp( // smulh Xd, Xn, Xm emitDispReg(id->idReg1(), size, true); emitDispReg(id->idReg2(), size, true); + emitDispReg(reg3, size, false); } else if ((ins == INS_smull) || (ins == INS_umull) || (ins == INS_smnegl) || (ins == INS_umnegl)) { // smull Xd, Wn, Wm emitDispReg(id->idReg1(), EA_8BYTE, true); - size = EA_4BYTE; - emitDispReg(id->idReg2(), size, true); + emitDispReg(id->idReg2(), EA_4BYTE, true); + emitDispReg(reg3, EA_4BYTE, false); } else { emitDispReg(id->idReg1(), size, true); emitDispReg(id->idReg2(), size, true); + emitDispReg(reg3, size, false); } - - if (id->idIsLclVar()) - { - emitDispReg(codeGen->rsGetRsvdReg(), size, false); - } - else - { - emitDispReg(id->idReg3(), size, false); - } - break; + } case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63) emitDispReg(id->idReg1(), size, true); @@ -13995,7 +14172,7 @@ void emitter::emitDispInsHelp( } break; - case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*, sha1su1) + case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*) emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); break; @@ -14392,6 +14569,27 @@ void emitter::emitDispInsHelp( emitDispImm(emitGetInsSC(id), false); break; + case IF_DV_3H: // DV_3H ...........mmmmm .O....nnnnnddddd Vd Vn Vm (vector) + if ((ins == INS_sha512h) || (ins == INS_sha512h2)) + { + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + } + else + { + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + } + emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + break; + + case IF_DV_3I: // DV_3I ...........mmmmm iiiiiinnnnnddddd Vd Vn Vm imm6 (vector) + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg3(), id->idInsOpt(), true); + emitDispImm(emitGetInsSC(id), false); + break; + case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) emitDispReg(id->idReg1(), size, true); emitDispReg(id->idReg2(), size, true); @@ -14399,6 +14597,13 @@ void emitter::emitDispInsHelp( emitDispReg(id->idReg4(), size, false); break; + case IF_DV_4B: // DV_4B ...........mmmmm .aaaaannnnnddddd Vd Vn Vm Va (vector) + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg3(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg4(), id->idInsOpt(), false); + break; + case IF_PC_0A: // PC_0A ................ ................ case IF_SN_0A: // SN_0A ................ ................ if (ins == INS_align) @@ -15265,10 +15470,35 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins // ALU, extend, scale case IF_DR_3C: // add, adc, and, bic, eon, eor, orn, orr, sub, sbc case IF_DR_2C: // cmp - case IF_DV_2U: // sha1h result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_2C; break; + + case IF_DV_2U: // sha + if (ins == INS_sha1h) + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + } + break; + + case IF_DV_2V: + if (ins == INS_sm4e) + { + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + } + break; // ALU, Conditional select case IF_DR_1D: // cset, csetm case IF_DR_3D: // csel, csinc, csinv, csneg @@ -16119,6 +16349,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_4C; break; + case IF_DV_4B: // eor3, bcax, sm3ss1 + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX (scalar) switch (ins) { @@ -16475,6 +16710,27 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_DV_3H: // sha512h, sha512h2, sha512su1, rax1, sm3partw1, sm3partw2, sm4ekey + switch (ins) + { + case INS_sm4ekey: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + default: + // all other instructions + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + } + break; + + case IF_DV_3I: // xar + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case IF_DV_2L: // abs, neg, cmeq, cmge, cmgt, cmle, cmlt (scalar) case IF_DV_2M: // (vector) // abs, neg, mvn, not, cmeq, cmge, cmgt, cmle, cmlt, @@ -16650,7 +16906,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_DV_2P: // aese, aesd, aesmc, aesimc, sha1su1, sha256su0 + case IF_DV_2P: // aese, aesd, aesmc, aesimc result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_2C; break; diff --git a/src/coreclr/jit/emitfmtsarm64.h b/src/coreclr/jit/emitfmtsarm64.h index 06e5ce1d26cde1..b710230609a8df 100644 --- a/src/coreclr/jit/emitfmtsarm64.h +++ b/src/coreclr/jit/emitfmtsarm64.h @@ -207,7 +207,8 @@ IF_DEF(DV_2Q, IS_NONE, NONE) // DV_2Q .........X...... ......nnnnnddddd S IF_DEF(DV_2R, IS_NONE, NONE) // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) IF_DEF(DV_2S, IS_NONE, NONE) // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) IF_DEF(DV_2T, IS_NONE, NONE) // DV_2T .Q......XX...... ......nnnnnddddd Sd Vn (addv, saddlv, smaxv, sminv, uaddlv, umaxv, uminv) -IF_DEF(DV_2U, IS_NONE, NONE) // DV_2U ................ ......nnnnnddddd Sd Sn (sha1h) +IF_DEF(DV_2U, IS_NONE, NONE) // DV_2U ................ ......nnnnnddddd Vd Vn (sha) +IF_DEF(DV_2V, IS_NONE, NONE) // DV_2V ................ ......nnnnnddddd Vd Vn (vector) IF_DEF(DV_3A, IS_NONE, NONE) // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) IF_DEF(DV_3AI, IS_NONE, NONE) // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by element) @@ -219,9 +220,12 @@ IF_DEF(DV_3DI, IS_NONE, NONE) // DV_3DI .........XLmmmmm ....H.nnnnnddddd V IF_DEF(DV_3E, IS_NONE, NONE) // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) IF_DEF(DV_3EI, IS_NONE, NONE) // DV_3EI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) IF_DEF(DV_3F, IS_NONE, NONE) // DV_3F ...........mmmmm ......nnnnnddddd Qd Sn Vm (Qd used as both source and destination) -IF_DEF(DV_3G, IS_NONE, NONE) // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) +IF_DEF(DV_3G, IS_NONE, NONE) // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) +IF_DEF(DV_3H, IS_NONE, NONE) // DV_3H ...........mmmmm .O....nnnnnddddd Vd Vn Vm (vector) +IF_DEF(DV_3I, IS_NONE, NONE) // DV_3I ...........mmmmm iiiiiinnnnnddddd Vd Vn Vm imm (vector) IF_DEF(DV_4A, IS_NONE, NONE) // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Vn Vm Va (scalar) +IF_DEF(DV_4B, IS_NONE, NONE) // DV_4B ...........mmmmm .aaaaannnnnddddd Vd Vn Vm Va (vector) IF_DEF(SN_0A, IS_NONE, NONE) // SN_0A ................ ................ IF_DEF(SI_0A, IS_NONE, NONE) // SI_0A ...........iiiii iiiiiiiiiii..... imm16 diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index e0c41875453087..4f94424065bf05 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -1527,8 +1527,8 @@ INST1(sha1h, "sha1h", 0, IF_DV_2U, 0x5E280800) INST1(sha1su0, "sha1su0", 0, IF_DV_3F, 0x5E003000) // sha1su0 Vd.4S,Vn.4S,Vm.4S DV_3F 01011110000mmmmm 001100nnnnnddddd 5E00 3000 Vd.4S Vn.4S Vm.4S (vector) -INST1(sha1su1, "sha1su1", 0, IF_DV_2P, 0x5E281800) - // sha1su1 Vd.4S, Vn.4S DV_2P 0101111000101000 000110nnnnnddddd 5E28 1800 Vd.4S Vn.4S (vector) +INST1(sha1su1, "sha1su1", 0, IF_DV_2U, 0x5E281800) + // sha1su1 Vd.4S, Vn.4S DV_2U 0101111000101000 000110nnnnnddddd 5E28 1800 Vd.4S Vn.4S (vector) INST1(sha256h, "sha256h", 0, IF_DV_3F, 0x5E004000) // sha256h Qd,Qn,Vm.4S DV_3F 01011110000mmmmm 010000nnnnnddddd 5E00 4000 Qd Qn Vm.4S (vector) @@ -1536,8 +1536,8 @@ INST1(sha256h, "sha256h", 0, IF_DV_3F, 0x5E004000) INST1(sha256h2, "sha256h2", 0, IF_DV_3F, 0x5E005000) // sha256h Qd,Qn,Vm.4S DV_3F 01011110000mmmmm 010100nnnnnddddd 5E00 5000 Qd Qn Vm.4S (vector) -INST1(sha256su0, "sha256su0", 0, IF_DV_2P, 0x5E282800) - // sha256su0 Vd.4S,Vn.4S DV_2P 0101111000101000 001010nnnnnddddd 5E28 2800 Vd.4S Vn.4S (vector) +INST1(sha256su0, "sha256su0", 0, IF_DV_2U, 0x5E282800) + // sha256su0 Vd.4S,Vn.4S DV_2U 0101111000101000 001010nnnnnddddd 5E28 2800 Vd.4S Vn.4S (vector) INST1(sha256su1, "sha256su1", 0, IF_DV_3F, 0x5E006000) // sha256su1 Vd.4S,Vn.4S,Vm.4S DV_3F 01011110000mmmmm 011000nnnnnddddd 5E00 6000 Vd.4S Vn.4S Vm.4S (vector) @@ -2044,6 +2044,46 @@ INST1(tbx_4regs, "tbx", 0, IF_DV_3C, 0x0E007000) INST1(align, "align", 0, IF_SN_0A, BAD_CODE) // align SN_0A #endif + +INST1(eor3, "eor3", 0, IF_DV_4B, 0xCE000000) + // eor3 Vd.16B,Vn.16B,Vm.16B, Va.16B DV_4B 11001110000mmmmm 0aaaaannnnnddddd Vd Vn Vm Va (vector) + +INST1(bcax, "bcax", 0, IF_DV_4B, 0xCE200000) + // bcax Vd.16B,Vn.16B,Vm.16B, Va.16B DV_4B 11001110001mmmmm 0aaaaannnnnddddd Vd Vn Vm Va (vector) + +INST1(sm3ss1, "sm3ss1", 0, IF_DV_4B, 0xCE400000) + // sm3ss1 Vd.4S,Vn.4S,Vm.4S, Va.4S DV_4B 11001110010mmmmm 0aaaaannnnnddddd Vd Vn Vm Va (vector) + +INST1(sha512h, "sha512h", 0, IF_DV_3H, 0xCE608000) + // sha512h Qd,Qn,Vm.2D DV_3H 11001110011mmmmm 100000nnnnnddddd Vd Vn Vm (vector) + +INST1(sha512h2, "sha512h2", 0, IF_DV_3H, 0xCE608400) + // sha512h2 Qd,Qn,Vm.2D DV_3H 11001110011mmmmm 100001nnnnnddddd Vd Vn Vm (vector) + +INST1(sha512su1, "sha512su1", 0, IF_DV_3H, 0xCE608800) + // sha512su1 Vd.2D,Vn.2D,Vm.2D DV_3H 11001110011mmmmm 100010nnnnnddddd Vd Vn Vm (vector) + +INST1(rax1, "rax1", 0, IF_DV_3H, 0xCE608C00) + // rax1 Vd.2D,Vn.2D,Vm.2D DV_3H 11001110011mmmmm 100011nnnnnddddd Vd Vn Vm (vector) + +INST1(sm3partw1, "sm3partw1", 0, IF_DV_3H, 0xCE60C000) + // sm3partw1 Vd.4S,Vn.4S,Vm.4S DV_3H 11001110011mmmmm 110000nnnnnddddd Vd Vn Vm (vector) + +INST1(sm3partw2, "sm3partw2", 0, IF_DV_3H, 0xCE60C400) + // sm3partw2 Vd.4S,Vn.4S,Vm.4S DV_3H 11001110011mmmmm 110001nnnnnddddd Vd Vn Vm (vector) + +INST1(sm4ekey, "sm4ekey", 0, IF_DV_3H, 0xCE60C800) + // sm4ekey Vd.4S,Vn.4S,Vm.4S DV_3H 11001110011mmmmm 110010nnnnnddddd Vd Vn Vm (vector) + +INST1(xar, "xar", 0, IF_DV_3I, 0xCE800000) + // xar Vd.2D,Vn.2D,Vm.2D,imm6 DV_3I 11001110100mmmmm iiiiiinnnnnddddd Vd Vn Vm imm6 (vector) + +INST1(sha512su0, "sha512su0", 0, IF_DV_2V, 0xCEC08000) + // sha512su0 Vd.2D,Vn.2D DV_2V 1100111011000000 100000nnnnnddddd Vd Vn (vector) + +INST1(sm4e, "sm4e", 0, IF_DV_2V, 0xCEC08400) + // sm4e Vd.4S,Vn.4S DV_2V 1100111011000000 100001nnnnnddddd Vd Vn (vector) + // clang-format on /*****************************************************************************/