From e385de6524f4655628b18643920ab11cfd5a1e48 Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 12 Feb 2024 12:06:12 -0800 Subject: [PATCH 01/12] Initial format boilerplate --- src/coreclr/jit/codegenarm64test.cpp | 128 ++++++++++++ src/coreclr/jit/emitarm64.cpp | 280 +++++++++++++++++++++++++++ 2 files changed, 408 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 6688ad38141718..37a0d52ea3705d 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7415,6 +7415,134 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_UNPREDICATED); theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, 255, INS_OPTS_NONE, INS_SCALABLE_OPTS_UNPREDICATED); + + // IF_SVE_HX_3A_B + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LD1B {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LD1SB {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1b, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LDFF1B {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sb, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LDFF1SB {.D }, /Z, [.D{, #}] + + // IF_SVE_HX_3A_E + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LD1H {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LD1SH {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LD1W {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LDFF1H {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sh, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LDFF1SH {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LDFF1W {.D }, /Z, [.D{, #}] + + // IF_SVE_IV_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LD1D {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sw, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LD1SW {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LDFF1D {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sw, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // LDFF1SW {.D }, /Z, [.D{, #}] + + // IF_SVE_JI_3A_A + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // ST1B {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // ST1H {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // ST1W {.D }, , [.D{, #}] + + // IF_SVE_JL_3A + theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + INS_OPTS_SCALABLE_B); // ST1D {.D }, , [.D{, #}] + + // IF_SVE_HY_3A + theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFB , , [, .S, ] + theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFD , , [, .S, #3] + theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFH , , [, .S, #1] + theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFW , , [, .S, #2] + + // IF_SVE_HY_3A_A + theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFB , , [, .D, ] + theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFD , , [, .D, #3] + theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFH , , [, .D, #1] + theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFW , , [, .D, #2] + + // IF_SVE_HY_3B + theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFB , , [, .D] + theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFD , , [, .D, LSL #3] + theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFH , , [, .D, LSL #1] + theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFW , , [, .D, LSL #2] + + // IF_SVE_IB_3A + theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFB , , [, ] + theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFD , , [, , LSL #3] + theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFH , , [, , LSL #1] + theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + INS_OPTS_SCALABLE_B); // PRFW , , [, , LSL #2] + + // IF_SVE_HZ_2A_B + theEmitter->emitIns_R_R_I(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, 5, + INS_OPTS_SCALABLE_B); // PRFB , , [.D{, #}] + theEmitter->emitIns_R_R_I(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, 5, + INS_OPTS_SCALABLE_B); // PRFD , , [.D{, #}] + theEmitter->emitIns_R_R_I(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, 5, + INS_OPTS_SCALABLE_B); // PRFH , , [.D{, #}] + theEmitter->emitIns_R_R_I(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, 5, + INS_OPTS_SCALABLE_B); // PRFW , , [.D{, #}] + + // IF_SVE_IA_2A + theEmitter->emitIns_R_R_I(INS_sve_prfb, EA_SCALABLE, REG_P0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // PRFB , , [{, #, MUL VL}] + theEmitter->emitIns_R_R_I(INS_sve_prfd, EA_SCALABLE, REG_P0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // PRFD , , [{, #, MUL VL}] + theEmitter->emitIns_R_R_I(INS_sve_prfh, EA_SCALABLE, REG_P0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // PRFH , , [{, #, MUL VL}] + theEmitter->emitIns_R_R_I(INS_sve_prfw, EA_SCALABLE, REG_P0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // PRFW , , [{, #, MUL VL}] + + // IF_SVE_IC_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rd, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // LD1RD {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsw, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // LD1RSW {.D }, /Z, [{, #}] + + // IF_SVE_IC_3A_A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // LD1RSH {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // LD1RW {.D }, /Z, [{, #}] + + // IF_SVE_IC_3A_B + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // LD1RH {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // LD1RSB {.D }, /Z, [{, #}] + + // IF_SVE_IC_3A_C + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + INS_OPTS_SCALABLE_B); // LD1RB {.D }, /Z, [{, #}] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f460833a28e31c..fa14ec0872804e 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2202,6 +2202,36 @@ void emitter::emitInsSanityCheck(instrDesc* id) // iiiiii break; + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + break; + + case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit + // scaled offsets) + break; + + case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) + break; + + case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) + break; + + case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) + break; + + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -22013,6 +22043,48 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + code = emitInsCodeSve(ins, fmt); + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit + // scaled offsets) + code = emitInsCodeSve(ins, fmt); + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) + code = emitInsCodeSve(ins, fmt); + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) + code = emitInsCodeSve(ins, fmt); + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) + code = emitInsCodeSve(ins, fmt); + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + code = emitInsCodeSve(ins, fmt); + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -25410,6 +25482,36 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg4(), id->idInsOpt(), false); break; + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + break; + + case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit + // scaled offsets) + break; + + case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) + break; + + case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) + break; + + case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) + break; + + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -28965,6 +29067,184 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + + case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit + // scaled offsets) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + + case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); From 7445edfc87d1427bd72a98affa2a1b5165967ece Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 12 Feb 2024 12:52:15 -0800 Subject: [PATCH 02/12] SVE_HX_3A_B format working --- src/coreclr/jit/codegenarm64test.cpp | 260 ++++++++++++++------------- src/coreclr/jit/emitarm64.cpp | 127 ++++++++++++- src/coreclr/jit/emitarm64.h | 4 + src/coreclr/jit/instrsarm64sve.h | 8 +- 4 files changed, 262 insertions(+), 137 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 37a0d52ea3705d..d1011f1d117cee 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7417,132 +7417,140 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_UNPREDICATED); // IF_SVE_HX_3A_B - theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LD1B {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LD1SB {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ldff1b, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LDFF1B {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sb, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LDFF1SB {.D }, /Z, [.D{, #}] - - // IF_SVE_HX_3A_E - theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LD1H {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LD1SH {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LD1W {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ldff1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LDFF1H {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sh, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LDFF1SH {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LDFF1W {.D }, /Z, [.D{, #}] - - // IF_SVE_IV_3A - theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LD1D {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ld1sw, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LD1SW {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ldff1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LDFF1D {.D }, /Z, [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sw, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // LDFF1SW {.D }, /Z, [.D{, #}] - - // IF_SVE_JI_3A_A - theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // ST1B {.D }, , [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // ST1H {.D }, , [.D{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // ST1W {.D }, , [.D{, #}] - - // IF_SVE_JL_3A - theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - INS_OPTS_SCALABLE_B); // ST1D {.D }, , [.D{, #}] - - // IF_SVE_HY_3A - theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFB , , [, .S, ] - theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFD , , [, .S, #3] - theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFH , , [, .S, #1] - theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFW , , [, .S, #2] - - // IF_SVE_HY_3A_A - theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFB , , [, .D, ] - theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFD , , [, .D, #3] - theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFH , , [, .D, #1] - theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFW , , [, .D, #2] - - // IF_SVE_HY_3B - theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFB , , [, .D] - theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFD , , [, .D, LSL #3] - theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFH , , [, .D, LSL #1] - theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFW , , [, .D, LSL #2] - - // IF_SVE_IB_3A - theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFB , , [, ] - theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFD , , [, , LSL #3] - theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFH , , [, , LSL #1] - theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - INS_OPTS_SCALABLE_B); // PRFW , , [, , LSL #2] - - // IF_SVE_HZ_2A_B - theEmitter->emitIns_R_R_I(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, 5, - INS_OPTS_SCALABLE_B); // PRFB , , [.D{, #}] - theEmitter->emitIns_R_R_I(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, 5, - INS_OPTS_SCALABLE_B); // PRFD , , [.D{, #}] - theEmitter->emitIns_R_R_I(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, 5, - INS_OPTS_SCALABLE_B); // PRFH , , [.D{, #}] - theEmitter->emitIns_R_R_I(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, 5, - INS_OPTS_SCALABLE_B); // PRFW , , [.D{, #}] - - // IF_SVE_IA_2A - theEmitter->emitIns_R_R_I(INS_sve_prfb, EA_SCALABLE, REG_P0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // PRFB , , [{, #, MUL VL}] - theEmitter->emitIns_R_R_I(INS_sve_prfd, EA_SCALABLE, REG_P0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // PRFD , , [{, #, MUL VL}] - theEmitter->emitIns_R_R_I(INS_sve_prfh, EA_SCALABLE, REG_P0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // PRFH , , [{, #, MUL VL}] - theEmitter->emitIns_R_R_I(INS_sve_prfw, EA_SCALABLE, REG_P0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // PRFW , , [{, #, MUL VL}] - - // IF_SVE_IC_3A - theEmitter->emitIns_R_R_R_I(INS_sve_ld1rd, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // LD1RD {.D }, /Z, [{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsw, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // LD1RSW {.D }, /Z, [{, #}] - - // IF_SVE_IC_3A_A - theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // LD1RSH {.D }, /Z, [{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // LD1RW {.D }, /Z, [{, #}] - - // IF_SVE_IC_3A_B - theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // LD1RH {.D }, /Z, [{, #}] - theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // LD1RSB {.D }, /Z, [{, #}] - - // IF_SVE_IC_3A_C - theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - INS_OPTS_SCALABLE_B); // LD1RB {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V0, REG_P0, REG_V1, 0, + INS_OPTS_SCALABLE_S); // LD1B {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V2, REG_P7, REG_V3, 5, + INS_OPTS_SCALABLE_S); // LD1SB {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1b, EA_SCALABLE, REG_V4, REG_P3, REG_V1, 5, + INS_OPTS_SCALABLE_S); // LDFF1B {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sb, EA_SCALABLE, REG_V2, REG_P6, REG_V0, 31, + INS_OPTS_SCALABLE_S); // LDFF1SB {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V0, REG_P0, REG_V1, 0, + INS_OPTS_SCALABLE_D); // LD1B {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V2, REG_P7, REG_V3, 5, + INS_OPTS_SCALABLE_D); // LD1SB {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1b, EA_SCALABLE, REG_V4, REG_P3, REG_V1, 5, + INS_OPTS_SCALABLE_D); // LDFF1B {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sb, EA_SCALABLE, REG_V2, REG_P6, REG_V0, 31, + INS_OPTS_SCALABLE_D); // LDFF1SB {.D }, /Z, [.D{, #}] + + //// IF_SVE_HX_3A_E + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // LD1H {.D }, /Z, [.D{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // LD1SH {.D }, /Z, [.D{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // LD1W {.D }, /Z, [.D{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ldff1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // LDFF1H {.D }, /Z, [.D{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sh, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // LDFF1SH {.D }, /Z, [.D{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // LDFF1W {.D }, /Z, [.D{, #}] + + //// IF_SVE_IV_3A + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // LD1D {.D }, /Z, [.D{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1sw, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // LD1SW {.D }, /Z, [.D{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ldff1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // LDFF1D {.D }, /Z, [.D{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sw, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // LDFF1SW {.D }, /Z, [.D{, #}] + + //// IF_SVE_JI_3A_A + //theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // ST1B {.D }, , [.D{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // ST1H {.D }, , [.D{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // ST1W {.D }, , [.D{, #}] + + //// IF_SVE_JL_3A + //theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, + // INS_OPTS_SCALABLE_B); // ST1D {.D }, , [.D{, #}] + + //// IF_SVE_HY_3A + //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFB , , [, .S, ] + //theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFD , , [, .S, #3] + //theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFH , , [, .S, #1] + //theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFW , , [, .S, #2] + + //// IF_SVE_HY_3A_A + //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFB , , [, .D, ] + //theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFD , , [, .D, #3] + //theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFH , , [, .D, #1] + //theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFW , , [, .D, #2] + + //// IF_SVE_HY_3B + //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFB , , [, .D] + //theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFD , , [, .D, LSL #3] + //theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFH , , [, .D, LSL #1] + //theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFW , , [, .D, LSL #2] + + //// IF_SVE_IB_3A + //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFB , , [, ] + //theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFD , , [, , LSL #3] + //theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFH , , [, , LSL #1] + //theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, + // INS_OPTS_SCALABLE_B); // PRFW , , [, , LSL #2] + + //// IF_SVE_HZ_2A_B + //theEmitter->emitIns_R_R_I(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, 5, + // INS_OPTS_SCALABLE_B); // PRFB , , [.D{, #}] + //theEmitter->emitIns_R_R_I(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, 5, + // INS_OPTS_SCALABLE_B); // PRFD , , [.D{, #}] + //theEmitter->emitIns_R_R_I(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, 5, + // INS_OPTS_SCALABLE_B); // PRFH , , [.D{, #}] + //theEmitter->emitIns_R_R_I(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, 5, + // INS_OPTS_SCALABLE_B); // PRFW , , [.D{, #}] + + //// IF_SVE_IA_2A + //theEmitter->emitIns_R_R_I(INS_sve_prfb, EA_SCALABLE, REG_P0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // PRFB , , [{, #, MUL VL}] + //theEmitter->emitIns_R_R_I(INS_sve_prfd, EA_SCALABLE, REG_P0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // PRFD , , [{, #, MUL VL}] + //theEmitter->emitIns_R_R_I(INS_sve_prfh, EA_SCALABLE, REG_P0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // PRFH , , [{, #, MUL VL}] + //theEmitter->emitIns_R_R_I(INS_sve_prfw, EA_SCALABLE, REG_P0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // PRFW , , [{, #, MUL VL}] + + //// IF_SVE_IC_3A + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rd, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RD {.D }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsw, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RSW {.D }, /Z, [{, #}] + + //// IF_SVE_IC_3A_A + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RSH {.D }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RW {.D }, /Z, [{, #}] + + //// IF_SVE_IC_3A_B + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RH {.D }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RSB {.D }, /Z, [{, #}] + + //// IF_SVE_IC_3A_C + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RB {.D }, /Z, [{, #}] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index fa14ec0872804e..22c0759f068c89 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2207,6 +2207,13 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isValidUimm5(emitGetInsSC(id))); break; case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled @@ -11594,18 +11601,49 @@ void emitter::emitIns_R_R_R_I(instruction ins, assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IJ_3A_D; + + if (isGeneralRegister(reg3)) + { + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_D; + } + else + { + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5(imm)); + fmt = IF_SVE_HX_3A_B; + } break; case INS_sve_ld1b: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IJ_3A_E; + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_E; + } + else + { + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5(imm)); + fmt = IF_SVE_HX_3A_B; + } + break; + + case INS_sve_ldff1b: + case INS_sve_ldff1sb: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5(imm)); + fmt = IF_SVE_HX_3A_B; break; case INS_sve_ld1sh: @@ -18740,6 +18778,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)(imm - 1) << 16; } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm5_20_to_16(ssize_t imm) +{ + assert(isValidUimm5(imm)); + return (code_t)imm << 16; +} + /***************************************************************************** * * Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. @@ -22048,7 +22097,20 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeUimm5_20_to_16(imm); // iiiii + + if (id->idInsOpt() == INS_OPTS_SCALABLE_D) + { + // set bit '30' to make this instruction a double-word + // this is only special for these formats + code |= (1 << 30); + } + dst += emitOutput_Instr(dst, code); break; @@ -22499,6 +22561,21 @@ void emitter::emitDispSveImmMulVl(regNumber reg1, ssize_t imm) printf("]"); } +/***************************************************************************** + * + * Prints the encoding for format [.D{, #}] + */ +void emitter::emitDispSveImmIndex(regNumber reg1, insOpts opt, ssize_t imm) +{ + printf("["); + emitDispSveReg(reg1, opt, imm != 0); + if (imm != 0) + { + emitDispImm(imm, false); + } + printf("]"); +} + /***************************************************************************** * * Prints the encoding for the Extend Type encoding in loads/stores @@ -25482,33 +25559,69 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg4(), id->idInsOpt(), false); break; + // {.S }, /Z, [.S{, #}] + // {.D }, /Z, [.D{, #}] case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + // {.S }, /Z, [.S{, #}] + // {.D }, /Z, [.D{, #}] case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + // {.S }, /Z, [.S{, #}] + // {.D }, /Z, [.D{, #}] case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + // {.S }, , [.S{, #}] + // {.D }, , [.D{, #}] case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + // {.S }, , [.S{, #}] + // {.D }, , [.D{, #}] case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(id->idIns()), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveImmIndex(id->idReg3(), id->idInsOpt(), imm); break; + // , , [, .S, ] + // , , [, .S, #1] + // , , [, .S, #2] + // , , [, .S, #3] case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled // offsets) + // , , [, .D, ] + // , , [, .D, #1] + // , , [, .D, #2] + // , , [, .D, #3] case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit // scaled offsets) break; + // , , [, .D] + // , , [, .D, LSL #1] + // , , [, .D, LSL #2] + // , , [, .D, LSL #3] case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled // offsets) + // , , [, ] + // , , [, , LSL #1] + // , , [, , LSL #2] + // , , [, , LSL #3] case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) break; + // , , [.D{, #}] case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) break; + // , , [{, #, MUL VL}] case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) break; + // {.D }, /Z, [{, #}] case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index abc745e9421c53..a69f1938482a26 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -54,6 +54,7 @@ void emitDispSveExtendOpts(insOpts opt); void emitDispSveExtendOptsModN(insOpts opt, int n); void emitDispSveModAddr(instruction ins, regNumber reg1, regNumber reg2, insOpts opt, insFormat fmt); void emitDispSveImmMulVl(regNumber reg1, ssize_t imm); +void emitDispSveImmIndex(regNumber reg1, insOpts opt, ssize_t imm); void emitDispLSExtendOpts(insOpts opt); void emitDispReg(regNumber reg, emitAttr attr, bool addComma); void emitDispSveReg(regNumber reg, insOpts opt, bool addComma); @@ -621,6 +622,9 @@ static code_t insEncodeUimm7_20_to_14(ssize_t imm); // Returns the encoding for the immediate value as 4-bits starting from 1, at bit locations '19-16'. static code_t insEncodeUimm4From1_19_to_16(ssize_t imm); +// Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. +static code_t insEncodeUimm5_20_to_16(ssize_t imm); + // Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. static code_t insEncodeImm8_12_to_5(ssize_t imm); diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index 710928a35eaffe..27ea6aa381e189 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -265,7 +265,7 @@ INST6(ld1sb, "ld1sb", 0, IF_SV // LD1SB {.D }, /Z, [, .D, ] SVE_HW_4A 110001000h0mmmmm 000gggnnnnnttttt C400 0000 // LD1SB {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 000gggnnnnnttttt 8400 0000 // LD1SB {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 100gggnnnnnttttt C440 8000 - // LD1SB {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 100gggnnnnnttttt 8420 8000 + // LD1SB {.S }, /Z, [.S{, #}] SVE_HX_3A_B 10000100001iiiii 100gggnnnnnttttt 8420 8000 // LD1SB {.D }, /Z, [{, #, MUL VL}] SVE_IJ_3A_D 101001011000iiii 101gggnnnnnttttt A580 A000 // LD1SB {.D }, /Z, [, ] SVE_IK_4A_F 10100101100mmmmm 010gggnnnnnttttt A580 4000 @@ -275,7 +275,7 @@ INST6(ld1b, "ld1b", 0, IF_SV // LD1B {.D }, /Z, [, .D, ] SVE_HW_4A 110001000h0mmmmm 010gggnnnnnttttt C400 4000 // LD1B {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 010gggnnnnnttttt 8400 4000 // LD1B {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 110gggnnnnnttttt C440 C000 - // LD1B {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 110gggnnnnnttttt 8420 C000 + // LD1B {.S }, /Z, [.S{, #}] SVE_HX_3A_B 10000100001iiiii 110gggnnnnnttttt 8420 C000 // LD1B {.B }, /Z, [{, #, MUL VL}] SVE_IJ_3A_E 101001000000iiii 101gggnnnnnttttt A400 A000 // LD1B {.B }, /Z, [, ] SVE_IK_4A_H 10100100000mmmmm 010gggnnnnnttttt A400 4000 @@ -395,7 +395,7 @@ INST5(ldff1sb, "ldff1sb", 0, IF_SV // LDFF1SB {.D }, /Z, [, .D, ] SVE_HW_4A 110001000h0mmmmm 001gggnnnnnttttt C400 2000 // LDFF1SB {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 001gggnnnnnttttt 8400 2000 // LDFF1SB {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 101gggnnnnnttttt C440 A000 - // LDFF1SB {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 101gggnnnnnttttt 8420 A000 + // LDFF1SB {.S }, /Z, [.S{, #}] SVE_HX_3A_B 10000100001iiiii 101gggnnnnnttttt 8420 A000 // LDFF1SB {.D }, /Z, [{, }] SVE_IG_4A_D 10100101100mmmmm 011gggnnnnnttttt A580 6000 @@ -404,7 +404,7 @@ INST5(ldff1b, "ldff1b", 0, IF_SV // LDFF1B {.D }, /Z, [, .D, ] SVE_HW_4A 110001000h0mmmmm 011gggnnnnnttttt C400 6000 // LDFF1B {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 011gggnnnnnttttt 8400 6000 // LDFF1B {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 111gggnnnnnttttt C440 E000 - // LDFF1B {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 111gggnnnnnttttt 8420 E000 + // LDFF1B {.S }, /Z, [.S{, #}] SVE_HX_3A_B 10000100001iiiii 111gggnnnnnttttt 8420 E000 // LDFF1B {.B }, /Z, [{, }] SVE_IG_4A_E 10100100000mmmmm 011gggnnnnnttttt A400 6000 From 5b53feef4840d95f7d04b15295ffc6d052092ca9 Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 12 Feb 2024 13:30:13 -0800 Subject: [PATCH 03/12] SVE_HX_3A_E working --- src/coreclr/jit/codegenarm64test.cpp | 38 ++++-- src/coreclr/jit/emitarm64.cpp | 170 ++++++++++++++++++++++++--- src/coreclr/jit/emitarm64.h | 23 ++++ 3 files changed, 199 insertions(+), 32 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index d1011f1d117cee..d056fc41d18f8f 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7434,19 +7434,31 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sb, EA_SCALABLE, REG_V2, REG_P6, REG_V0, 31, INS_OPTS_SCALABLE_D); // LDFF1SB {.D }, /Z, [.D{, #}] - //// IF_SVE_HX_3A_E - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // LD1H {.D }, /Z, [.D{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // LD1SH {.D }, /Z, [.D{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // LD1W {.D }, /Z, [.D{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ldff1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // LDFF1H {.D }, /Z, [.D{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sh, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // LDFF1SH {.D }, /Z, [.D{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // LDFF1W {.D }, /Z, [.D{, #}] + // IF_SVE_HX_3A_E + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V1, REG_P0, REG_V2, 0, + INS_OPTS_SCALABLE_S); // LD1H {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P4, REG_V3, 2, + INS_OPTS_SCALABLE_S); // LD1SH {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V1, REG_P2, REG_V9, 124, + INS_OPTS_SCALABLE_S); // LD1W {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1h, EA_SCALABLE, REG_V4, REG_P7, REG_V3, 6, + INS_OPTS_SCALABLE_S); // LDFF1H {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sh, EA_SCALABLE, REG_V3, REG_P5, REG_V4, 62, + INS_OPTS_SCALABLE_S); // LDFF1SH {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V2, REG_P1, REG_V3, 124, + INS_OPTS_SCALABLE_S); // LDFF1W {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V1, REG_P0, REG_V2, 0, + INS_OPTS_SCALABLE_D); // LD1H {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P4, REG_V3, 2, + INS_OPTS_SCALABLE_D); // LD1SH {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V1, REG_P2, REG_V9, 124, + INS_OPTS_SCALABLE_D); // LD1W {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1h, EA_SCALABLE, REG_V4, REG_P7, REG_V3, 6, + INS_OPTS_SCALABLE_D); // LDFF1H {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sh, EA_SCALABLE, REG_V3, REG_P5, REG_V4, 62, + INS_OPTS_SCALABLE_D); // LDFF1SH {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V2, REG_P1, REG_V3, 124, + INS_OPTS_SCALABLE_D); // LDFF1W {.D }, /Z, [.D{, #}] //// IF_SVE_IV_3A //theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 22c0759f068c89..65a24ca8c343e3 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2203,6 +2203,15 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isValidUimm5(emitGetInsSC(id))); + break; + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) @@ -2213,7 +2222,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg1())); assert(isLowPredicateRegister(id->idReg2())); assert(isVectorRegister(id->idReg3())); - assert(isValidUimm5(emitGetInsSC(id))); break; case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled @@ -11583,9 +11591,18 @@ void emitter::emitIns_R_R_R_I(instruction ins, assert(insOptsScalableWordsOrQuadwords(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IH_3A_F; + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + fmt = IF_SVE_IH_3A_F; + } + else + { + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf4(imm)); + fmt = IF_SVE_HX_3A_E; + } break; case INS_sve_ld1sw: @@ -11650,18 +11667,54 @@ void emitter::emitIns_R_R_R_I(instruction ins, assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IJ_3A_F; + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_F; + } + else + { + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf2(imm)); + fmt = IF_SVE_HX_3A_E; + } break; case INS_sve_ld1h: assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IJ_3A_G; + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_G; + } + else + { + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf2(imm)); + fmt = IF_SVE_HX_3A_E; + } + break; + + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf2(imm)); + fmt = IF_SVE_HX_3A_E; + + case INS_sve_ldff1w: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf4(imm)); + fmt = IF_SVE_HX_3A_E; break; case INS_sve_ldnf1sw: @@ -17017,6 +17070,44 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30'. + * This only works on select formats. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_30(insFormat fmt, emitAttr size) +{ + switch (fmt) + { + case IF_SVE_HX_3A_B: + case IF_SVE_HX_3A_E: + case IF_SVE_IV_3A: + case IF_SVE_JI_3A_A: + case IF_SVE_JL_3A: + switch (size) + { + case EA_4BYTE: + return 0; + + case EA_8BYTE: + return (1 << 30); + + default: + break; + } + + assert(!"Invalid size for vector register"); + return 0; + + default: + break; + } + + assert(!"Unexpected instruction format"); + return 0; +} + /***************************************************************************** * * Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate. @@ -18686,6 +18777,29 @@ void emitter::emitIns_Call(EmitCallType callType, return insEncodeSimm4_19_to_16(imm / 32); } +/***************************************************************************** + * + * // Returns the encoding for the immediate value that is a multiple of 2 as 5-bits at bit locations '20-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm5_MultipleOf2_20_to_16(ssize_t imm) +{ + assert(isValidUimm5_MultipleOf2(imm)); + return insEncodeUimm5_20_to_16(imm / 2); +} + +/***************************************************************************** + * + * // Returns the encoding for the immediate value that is a multiple of 4 as 5-bits at bit locations '20-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm5_MultipleOf4_20_to_16(ssize_t imm) +{ + assert(isValidUimm5_MultipleOf4(imm)); + return insEncodeUimm5_20_to_16(imm / 4); +} + + /***************************************************************************** * * Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. @@ -22093,22 +22207,37 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) - case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) - case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) - case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) - case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn code |= insEncodeUimm5_20_to_16(imm); // iiiii + code |= insEncodeSveElemsize_30(fmt, optGetSveElemsize(id->idInsOpt())); + dst += emitOutput_Instr(dst, code); + break; - if (id->idInsOpt() == INS_OPTS_SCALABLE_D) + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSveElemsize_30(fmt, optGetSveElemsize(id->idInsOpt())); + + switch (ins) { - // set bit '30' to make this instruction a double-word - // this is only special for these formats - code |= (1 << 30); + case INS_sve_ld1w: + case INS_sve_ldff1w: + code |= insEncodeUimm5_MultipleOf4_20_to_16(imm); // iiiii + break; + + default: + code |= insEncodeUimm5_MultipleOf2_20_to_16(imm); // iiiii + break; } dst += emitOutput_Instr(dst, code); @@ -22571,7 +22700,10 @@ void emitter::emitDispSveImmIndex(regNumber reg1, insOpts opt, ssize_t imm) emitDispSveReg(reg1, opt, imm != 0); if (imm != 0) { - emitDispImm(imm, false); + // This does not have to be printed as hex. + // We only do it because the capstone disassembly displays this immediate as hex. + // We could not modify capstone without affecting other cases. + emitDispImm(imm, false, /* alwaysHex */ (imm > 8)); } printf("]"); } diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index a69f1938482a26..428b08e436ed94 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -534,6 +534,10 @@ static code_t insEncodeSveElemsize_sz_21(emitAttr size); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); +// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30'. +// This only works on select formats. +static code_t insEncodeSveElemsize_30(insFormat fmt, emitAttr size); + // Returns the encoding to select the constant values 90 or 270 for an Arm64 SVE vector instruction // This specifically encode the field 'rot' at bit location '16'. static code_t insEncodeSveImm90_or_270_rot(ssize_t imm); @@ -598,6 +602,12 @@ static code_t insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm); // Returns the encoding for the immediate value that is a multiple of 32 as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm); +// Returns the encoding for the immediate value that is a multiple of 2 as 5-bits at bit locations '20-16'. +static code_t insEncodeUimm5_MultipleOf2_20_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 4 as 5-bits at bit locations '20-16'. +static code_t insEncodeUimm5_MultipleOf4_20_to_16(ssize_t imm); + // Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. static code_t insEncodeSimm5_20_to_16(ssize_t imm); @@ -696,6 +706,19 @@ static bool isValidSimm4_MultipleOf32(ssize_t value) return (-256 <= value) && (value <= 224) && (value % 32 == 0); }; +// Returns true if 'value' is a legal signed multiple of 2 immediate 5 bit encoding (such as for LD1H). +static bool isValidUimm5_MultipleOf2(ssize_t value) +{ + return (0 <= value) && (value <= 62) && (value % 2 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 4 immediate 5 bit encoding (such as for LD1W). +static bool isValidUimm5_MultipleOf4(ssize_t value) +{ + return (0 <= value) && (value <= 124) && (value % 2 == 0); +}; + + // Returns true if 'value' is a legal immediate 1 bit encoding (such as for PEXT). static bool isValidImm1(ssize_t value) { From e9a27d3a58e9551ee8ede9de10f554e9628192c9 Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 12 Feb 2024 14:00:33 -0800 Subject: [PATCH 04/12] SVE_IV_3A working --- src/coreclr/jit/codegenarm64test.cpp | 18 ++-- src/coreclr/jit/emitarm64.cpp | 151 +++++++++++++++++++++------ src/coreclr/jit/emitarm64.h | 10 +- 3 files changed, 137 insertions(+), 42 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index d056fc41d18f8f..4ac16d94755683 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7460,15 +7460,15 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V2, REG_P1, REG_V3, 124, INS_OPTS_SCALABLE_D); // LDFF1W {.D }, /Z, [.D{, #}] - //// IF_SVE_IV_3A - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // LD1D {.D }, /Z, [.D{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1sw, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // LD1SW {.D }, /Z, [.D{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ldff1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // LDFF1D {.D }, /Z, [.D{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sw, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // LDFF1SW {.D }, /Z, [.D{, #}] + // IF_SVE_IV_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 0, + INS_OPTS_SCALABLE_D); // LD1D {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sw, EA_SCALABLE, REG_V6, REG_P5, REG_V4, 0, + INS_OPTS_SCALABLE_D); // LD1SW {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1d, EA_SCALABLE, REG_V7, REG_P3, REG_V1, 248, + INS_OPTS_SCALABLE_D); // LDFF1D {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sw, EA_SCALABLE, REG_V2, REG_P0, REG_V4, 124, + INS_OPTS_SCALABLE_D); // LDFF1SW {.D }, /Z, [.D{, #}] //// IF_SVE_JI_3A_A //theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 65a24ca8c343e3..db1600b05d5ae1 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2213,7 +2213,23 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + break; + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + break; + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) elemsize = id->idOpSize(); @@ -2222,6 +2238,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg1())); assert(isLowPredicateRegister(id->idReg2())); assert(isVectorRegister(id->idReg3())); + assert(isValidUimm5_MultipleOf8(emitGetInsSC(id))); break; case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled @@ -11573,24 +11590,43 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ld1d: assert(insOptsScalable(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - if (opt == INS_OPTS_SCALABLE_Q) + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) { - fmt = IF_SVE_IH_3A_A; + assert(isValidSimm4(imm)); + if (opt == INS_OPTS_SCALABLE_Q) + { + fmt = IF_SVE_IH_3A_A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + fmt = IF_SVE_IH_3A; + } } else { assert(opt == INS_OPTS_SCALABLE_D); - fmt = IF_SVE_IH_3A; + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf8(imm)); + fmt = IF_SVE_IV_3A; } break; + case INS_sve_ldff1d: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf8(imm)); + fmt = IF_SVE_IV_3A; + break; + case INS_sve_ld1w: assert(insOptsScalableWordsOrQuadwords(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); if (isGeneralRegister(reg3)) { @@ -11608,16 +11644,34 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ld1sw: assert(opt == INS_OPTS_SCALABLE_D); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IJ_3A; + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A; + } + else + { + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf4(imm)); + fmt = IF_SVE_IV_3A; + } + break; + + case INS_sve_ldff1sw: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf4(imm)); + fmt = IF_SVE_IV_3A; break; case INS_sve_ld1sb: assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); if (isGeneralRegister(reg3)) { @@ -11666,7 +11720,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ld1sh: assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); if (isGeneralRegister(reg3)) { @@ -11684,7 +11738,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ld1h: assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); if (isGeneralRegister(reg3)) { @@ -11703,15 +11757,16 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldff1sh: assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); assert(isValidUimm5_MultipleOf2(imm)); fmt = IF_SVE_HX_3A_E; + break; case INS_sve_ldff1w: assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); assert(isValidUimm5_MultipleOf4(imm)); fmt = IF_SVE_HX_3A_E; @@ -11721,7 +11776,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnf1d: assert(opt == INS_OPTS_SCALABLE_D); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); fmt = IF_SVE_IL_3A; @@ -11731,7 +11786,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnf1w: assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); fmt = IF_SVE_IL_3A_A; @@ -11741,7 +11796,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnf1sb: assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); fmt = IF_SVE_IL_3A_B; @@ -11762,7 +11817,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnt1d: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); @@ -11804,7 +11859,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ld1rod: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); #ifdef DEBUG @@ -11865,7 +11920,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ld4q: assert(opt == INS_OPTS_SCALABLE_Q); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); #ifdef DEBUG @@ -11906,7 +11961,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ld4d: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); #ifdef DEBUG @@ -11978,7 +12033,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st4q: assert(opt == INS_OPTS_SCALABLE_Q); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); #ifdef DEBUG @@ -12011,7 +12066,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_stnt1d: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); @@ -12046,7 +12101,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st1w: case INS_sve_st1d: assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); @@ -12091,7 +12146,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st4d: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); #ifdef DEBUG @@ -12161,7 +12216,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st1b: case INS_sve_st1h: assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); // st1h is reserved for scalable B @@ -18799,6 +18854,16 @@ void emitter::emitIns_Call(EmitCallType callType, return insEncodeUimm5_20_to_16(imm / 4); } +/***************************************************************************** + * + * // Returns the encoding for the immediate value that is a multiple of 8 as 5-bits at bit locations '20-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm5_MultipleOf8_20_to_16(ssize_t imm) +{ + assert(isValidUimm5_MultipleOf8(imm)); + return insEncodeUimm5_20_to_16(imm / 8); +} /***************************************************************************** * @@ -22218,6 +22283,28 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSveElemsize_30(fmt, optGetSveElemsize(id->idInsOpt())); + + switch (ins) + { + case INS_sve_ld1w: + case INS_sve_ldff1w: + code |= insEncodeUimm5_MultipleOf4_20_to_16(imm); // iiiii + break; + + default: + code |= insEncodeUimm5_MultipleOf2_20_to_16(imm); // iiiii + break; + } + + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) @@ -22230,13 +22317,13 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) switch (ins) { - case INS_sve_ld1w: - case INS_sve_ldff1w: - code |= insEncodeUimm5_MultipleOf4_20_to_16(imm); // iiiii + case INS_sve_ld1d: + case INS_sve_ldff1d: + code |= insEncodeUimm5_MultipleOf8_20_to_16(imm); // iiiii break; default: - code |= insEncodeUimm5_MultipleOf2_20_to_16(imm); // iiiii + code |= insEncodeUimm5_MultipleOf4_20_to_16(imm); // iiiii break; } diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 428b08e436ed94..c5b97b66417c79 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -608,6 +608,9 @@ static code_t insEncodeUimm5_MultipleOf2_20_to_16(ssize_t imm); // Returns the encoding for the immediate value that is a multiple of 4 as 5-bits at bit locations '20-16'. static code_t insEncodeUimm5_MultipleOf4_20_to_16(ssize_t imm); +// Returns the encoding for the immediate value that is a multiple of 8 as 5-bits at bit locations '20-16'. +static code_t insEncodeUimm5_MultipleOf8_20_to_16(ssize_t imm); + // Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. static code_t insEncodeSimm5_20_to_16(ssize_t imm); @@ -715,9 +718,14 @@ static bool isValidUimm5_MultipleOf2(ssize_t value) // Returns true if 'value' is a legal signed multiple of 4 immediate 5 bit encoding (such as for LD1W). static bool isValidUimm5_MultipleOf4(ssize_t value) { - return (0 <= value) && (value <= 124) && (value % 2 == 0); + return (0 <= value) && (value <= 124) && (value % 4 == 0); }; +// Returns true if 'value' is a legal signed multiple of 8 immediate 5 bit encoding (such as for LD1D). +static bool isValidUimm5_MultipleOf8(ssize_t value) +{ + return (0 <= value) && (value <= 248) && (value % 8 == 0); +}; // Returns true if 'value' is a legal immediate 1 bit encoding (such as for PEXT). static bool isValidImm1(ssize_t value) From 98b3b3404976d721440a265d1470967f10fbaba1 Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 12 Feb 2024 14:40:26 -0800 Subject: [PATCH 05/12] SVE_JI_3A_A and SVE_JL_3A working --- src/coreclr/jit/codegenarm64test.cpp | 42 +++++-- src/coreclr/jit/emitarm64.cpp | 172 ++++++++++++++++++++------- src/coreclr/jit/emitarm64.h | 4 +- 3 files changed, 165 insertions(+), 53 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 4ac16d94755683..90798899216539 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7470,17 +7470,37 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sw, EA_SCALABLE, REG_V2, REG_P0, REG_V4, 124, INS_OPTS_SCALABLE_D); // LDFF1SW {.D }, /Z, [.D{, #}] - //// IF_SVE_JI_3A_A - //theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // ST1B {.D }, , [.D{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // ST1H {.D }, , [.D{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // ST1W {.D }, , [.D{, #}] - - //// IF_SVE_JL_3A - //theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P0, REG_V0, 5, - // INS_OPTS_SCALABLE_B); // ST1D {.D }, , [.D{, #}] + // IF_SVE_JI_3A_A + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 0, + INS_OPTS_SCALABLE_S); // ST1B {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 31, + INS_OPTS_SCALABLE_S); // ST1B {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 0, + INS_OPTS_SCALABLE_S); // ST1H {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 62, + INS_OPTS_SCALABLE_S); // ST1H {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 0, + INS_OPTS_SCALABLE_S); // ST1W {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 124, + INS_OPTS_SCALABLE_S); // ST1W {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 0, + INS_OPTS_SCALABLE_D); // ST1B {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 31, + INS_OPTS_SCALABLE_D); // ST1B {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 0, + INS_OPTS_SCALABLE_D); // ST1H {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 62, + INS_OPTS_SCALABLE_D); // ST1H {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 0, + INS_OPTS_SCALABLE_D); // ST1W {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 124, + INS_OPTS_SCALABLE_D); // ST1W {.D }, , [.D{, #}] + + // IF_SVE_JL_3A + theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P7, REG_V4, 0, + INS_OPTS_SCALABLE_D); // ST1D {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P7, REG_V4, 248, + INS_OPTS_SCALABLE_D); // ST1D {.D }, , [.D{, #}] //// IF_SVE_HY_3A //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index db1600b05d5ae1..d2e41c91c42b6a 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2231,13 +2231,21 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) - case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) elemsize = id->idOpSize(); assert(insOptsScalableWords(id->idInsOpt())); assert(isScalableVectorSize(elemsize)); assert(isVectorRegister(id->idReg1())); assert(isLowPredicateRegister(id->idReg2())); assert(isVectorRegister(id->idReg3())); + break; + + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); assert(isValidUimm5_MultipleOf8(emitGetInsSC(id))); break; @@ -11635,6 +11643,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, } else { + assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg3)); assert(isValidUimm5_MultipleOf4(imm)); fmt = IF_SVE_HX_3A_E; @@ -11755,7 +11764,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldff1h: case INS_sve_ldff1sh: - assert(insOptsScalableAtLeastHalf(opt)); + assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); @@ -11764,7 +11773,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; case INS_sve_ldff1w: - assert(insOptsScalableAtLeastHalf(opt)); + assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); @@ -12102,32 +12111,50 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st1d: assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - if (opt == INS_OPTS_SCALABLE_Q && (ins == INS_sve_st1d)) - { - fmt = IF_SVE_JN_3C_D; - } - else + if (isGeneralRegister(reg3)) { - if ((ins == INS_sve_st1w) && insOptsScalableWords(opt)) + assert(isValidSimm4(imm)); + + if (opt == INS_OPTS_SCALABLE_Q && (ins == INS_sve_st1d)) { - fmt = IF_SVE_JN_3B; + fmt = IF_SVE_JN_3C_D; } else { -#if DEBUG - if (ins == INS_sve_st1w) + if ((ins == INS_sve_st1w) && insOptsScalableWords(opt)) { - assert(opt == INS_OPTS_SCALABLE_Q); + fmt = IF_SVE_JN_3B; } else { - assert(opt == INS_OPTS_SCALABLE_D); - } +#if DEBUG + if (ins == INS_sve_st1w) + { + assert(opt == INS_OPTS_SCALABLE_Q); + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + } #endif // DEBUG - fmt = IF_SVE_JN_3C; + fmt = IF_SVE_JN_3C; + } + } + } + else + { + assert(isVectorRegister(reg3)); + if ((ins == INS_sve_st1w) && insOptsScalableWords(opt)) + { + assert(isValidUimm5_MultipleOf4(imm)); + fmt = IF_SVE_JI_3A_A; + } + else + { + assert(ins == INS_sve_st1d); + assert(isValidUimm5_MultipleOf8(imm)); + fmt = IF_SVE_JL_3A; } } break; @@ -12217,11 +12244,38 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st1h: assert(isVectorRegister(reg1)); assert(isLowPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - // st1h is reserved for scalable B - assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : insOptsScalableStandard(opt)); - fmt = IF_SVE_JN_3A; + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + // st1h is reserved for scalable B + assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : insOptsScalableStandard(opt)); + fmt = IF_SVE_JN_3A; + } + else + { + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg3)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_st1b: + assert(isValidUimm5(imm)); + break; + + case INS_sve_st1h: + assert(isValidUimm5_MultipleOf2(imm)); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_JI_3A_A; + } break; case INS_sve_fmla: @@ -17131,15 +17185,12 @@ void emitter::emitIns_Call(EmitCallType callType, * This only works on select formats. */ -/*static*/ emitter::code_t emitter::insEncodeSveElemsize_30(insFormat fmt, emitAttr size) +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_30_or_21(insFormat fmt, emitAttr size) { switch (fmt) { case IF_SVE_HX_3A_B: case IF_SVE_HX_3A_E: - case IF_SVE_IV_3A: - case IF_SVE_JI_3A_A: - case IF_SVE_JL_3A: switch (size) { case EA_4BYTE: @@ -17155,6 +17206,26 @@ void emitter::emitIns_Call(EmitCallType callType, assert(!"Invalid size for vector register"); return 0; + case IF_SVE_IV_3A: + assert(size == EA_8BYTE); + return 0; + + case IF_SVE_JI_3A_A: + switch (size) + { + case EA_4BYTE: + return (1 << 21); + + case EA_8BYTE: + return 0; + + default: + break; + } + + assert(!"Invalid size for vector register"); + return 0; + default: break; } @@ -22278,22 +22349,30 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn code |= insEncodeUimm5_20_to_16(imm); // iiiii - code |= insEncodeSveElemsize_30(fmt, optGetSveElemsize(id->idInsOpt())); + code |= insEncodeSveElemsize_30_or_21(fmt, optGetSveElemsize(id->idInsOpt())); dst += emitOutput_Instr(dst, code); break; case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn - code |= insEncodeSveElemsize_30(fmt, optGetSveElemsize(id->idInsOpt())); + code |= insEncodeSveElemsize_30_or_21(fmt, optGetSveElemsize(id->idInsOpt())); switch (ins) { + case INS_sve_ld1d: + case INS_sve_ldff1d: + code |= insEncodeUimm5_MultipleOf8_20_to_16(imm); // iiiii + break; + case INS_sve_ld1w: + case INS_sve_ld1sw: case INS_sve_ldff1w: + case INS_sve_ldff1sw: code |= insEncodeUimm5_MultipleOf4_20_to_16(imm); // iiiii break; @@ -22305,26 +22384,41 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; - case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) - case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) - case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn - code |= insEncodeSveElemsize_30(fmt, optGetSveElemsize(id->idInsOpt())); + code |= insEncodeUimm5_MultipleOf8_20_to_16(imm); // iiiii + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSveElemsize_30_or_21(fmt, optGetSveElemsize(id->idInsOpt())); switch (ins) { - case INS_sve_ld1d: - case INS_sve_ldff1d: - code |= insEncodeUimm5_MultipleOf8_20_to_16(imm); // iiiii + case INS_sve_st1b: + code |= insEncodeUimm5_20_to_16(imm); // iiiii break; - default: + case INS_sve_st1h: + code |= insEncodeUimm5_MultipleOf2_20_to_16(imm); // iiiii + break; + + case INS_sve_st1w: code |= insEncodeUimm5_MultipleOf4_20_to_16(imm); // iiiii break; + + default: + code |= insEncodeUimm5_20_to_16(imm); // iiiii + break; } dst += emitOutput_Instr(dst, code); @@ -22790,7 +22884,7 @@ void emitter::emitDispSveImmIndex(regNumber reg1, insOpts opt, ssize_t imm) // This does not have to be printed as hex. // We only do it because the capstone disassembly displays this immediate as hex. // We could not modify capstone without affecting other cases. - emitDispImm(imm, false, /* alwaysHex */ (imm > 8)); + emitDispImm(imm, false, /* alwaysHex */ (imm > 31)); } printf("]"); } @@ -25784,13 +25878,11 @@ void emitter::emitDispInsHelp( // {.S }, /Z, [.S{, #}] // {.D }, /Z, [.D{, #}] case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) - // {.S }, /Z, [.S{, #}] // {.D }, /Z, [.D{, #}] case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) // {.S }, , [.S{, #}] // {.D }, , [.D{, #}] case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) - // {.S }, , [.S{, #}] // {.D }, , [.D{, #}] case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) imm = emitGetInsSC(id); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index c5b97b66417c79..9009c548e357ee 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -534,9 +534,9 @@ static code_t insEncodeSveElemsize_sz_21(emitAttr size); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); -// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30'. +// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30' or '21'. // This only works on select formats. -static code_t insEncodeSveElemsize_30(insFormat fmt, emitAttr size); +static code_t insEncodeSveElemsize_30_or_21(insFormat fmt, emitAttr size); // Returns the encoding to select the constant values 90 or 270 for an Arm64 SVE vector instruction // This specifically encode the field 'rot' at bit location '16'. From 4bc1483a27172dbfa63235f648b1a42928973f14 Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 12 Feb 2024 15:24:29 -0800 Subject: [PATCH 06/12] SVE_IC_3A working --- src/coreclr/jit/codegenarm64test.cpp | 32 ++++- src/coreclr/jit/emitarm64.cpp | 170 +++++++++++++++++++++------ src/coreclr/jit/emitarm64.h | 36 ++++++ 3 files changed, 196 insertions(+), 42 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 90798899216539..2e6293f8c664bb 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7502,15 +7502,37 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P7, REG_V4, 248, INS_OPTS_SCALABLE_D); // ST1D {.D }, , [.D{, #}] - //// IF_SVE_HY_3A + // IF_SVE_IC_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rd, EA_SCALABLE, REG_V1, REG_P2, REG_R3, 504, + INS_OPTS_SCALABLE_D); // LD1RD {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsw, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 252, + INS_OPTS_SCALABLE_D); // LD1RSW {.D }, /Z, [{, #}] + + //// IF_SVE_IC_3A_A + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RSH {.D }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RW {.D }, /Z, [{, #}] + + //// IF_SVE_IC_3A_B + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RH {.D }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RSB {.D }, /Z, [{, #}] + + //// IF_SVE_IC_3A_C + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + // INS_OPTS_SCALABLE_B); // LD1RB {.D }, /Z, [{, #}] + + // IF_SVE_HY_3A //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFB , , [, .S, ] + // INS_OPTS_SCALABLE_S); // PRFB , , [, .S, ] //theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFD , , [, .S, #3] + // INS_OPTS_SCALABLE_S); // PRFD , , [, .S, #3] //theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFH , , [, .S, #1] + // INS_OPTS_SCALABLE_S); // PRFH , , [, .S, #1] //theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFW , , [, .S, #2] + // INS_OPTS_SCALABLE_S); // PRFW , , [, .S, #2] //// IF_SVE_HY_3A_A //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index d2e41c91c42b6a..8e15598eb19076 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2249,6 +2249,20 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm5_MultipleOf8(emitGetInsSC(id))); break; + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + break; + case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled // offsets) case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit @@ -2266,12 +2280,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) break; - case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - break; - default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -11012,6 +11020,9 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_SVE_HP_3A; break; + case INS_sve_prfb: + break; + default: unreached(); break; @@ -12527,6 +12538,24 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_GP_3A; break; + case INS_sve_ld1rd: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6_MultipleOf8(imm)); + fmt = IF_SVE_IC_3A; + break; + + case INS_sve_ld1rsw: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6_MultipleOf4(imm)); + fmt = IF_SVE_IC_3A; + break; + default: unreached(); break; @@ -18936,6 +18965,39 @@ void emitter::emitIns_Call(EmitCallType callType, return insEncodeUimm5_20_to_16(imm / 8); } +/***************************************************************************** + * + * // Returns the encoding for the immediate value that is a multiple of 2 as 6-bits at bit locations '21-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm6_MultipleOf2_21_to_16(ssize_t imm) +{ + assert(isValidUimm6_MultipleOf2(imm)); + return insEncodeUimm6_21_to_16(imm / 2); +} + +/***************************************************************************** + * + * // Returns the encoding for the immediate value that is a multiple of 4 as 6-bits at bit locations '21-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm6_MultipleOf4_21_to_16(ssize_t imm) +{ + assert(isValidUimm6_MultipleOf4(imm)); + return insEncodeUimm6_21_to_16(imm / 4); +} + +/***************************************************************************** + * + * // Returns the encoding for the immediate value that is a multiple of 8 as 6-bits at bit locations '21-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm6_MultipleOf8_21_to_16(ssize_t imm) +{ + assert(isValidUimm6_MultipleOf8(imm)); + return insEncodeUimm6_21_to_16(imm / 8); +} + /***************************************************************************** * * Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. @@ -19039,6 +19101,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)imm << 16; } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 6-bits at bit locations '21-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm6_21_to_16(ssize_t imm) +{ + assert(isValidUimm6(imm)); + return (code_t)imm << 16; +} + /***************************************************************************** * * Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. @@ -22404,10 +22477,6 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) switch (ins) { - case INS_sve_st1b: - code |= insEncodeUimm5_20_to_16(imm); // iiiii - break; - case INS_sve_st1h: code |= insEncodeUimm5_MultipleOf2_20_to_16(imm); // iiiii break; @@ -22417,6 +22486,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) break; default: + assert(ins == INS_sve_st1b); code |= insEncodeUimm5_20_to_16(imm); // iiiii break; } @@ -22424,6 +22494,35 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + + switch (ins) + { + case INS_sve_ld1rd: + code |= insEncodeUimm6_MultipleOf8_21_to_16(imm); // iiiiii + break; + + default: + assert(ins == INS_sve_ld1rsw); + code |= insEncodeUimm6_MultipleOf4_21_to_16(imm); // iiiiii + break; + } + + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + code = emitInsCodeSve(ins, fmt); + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled // offsets) case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit @@ -22449,14 +22548,6 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; - case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - code = emitInsCodeSve(ins, fmt); - dst += emitOutput_Instr(dst, code); - break; - default: assert(!"Unexpected format"); break; @@ -22878,7 +22969,14 @@ void emitter::emitDispSveImmMulVl(regNumber reg1, ssize_t imm) void emitter::emitDispSveImmIndex(regNumber reg1, insOpts opt, ssize_t imm) { printf("["); - emitDispSveReg(reg1, opt, imm != 0); + if (isVectorRegister(reg1)) + { + emitDispSveReg(reg1, opt, imm != 0); + } + else + { + emitDispReg(reg1, EA_8BYTE, imm != 0); + } if (imm != 0) { // This does not have to be printed as hex. @@ -25885,6 +25983,14 @@ void emitter::emitDispInsHelp( case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) // {.D }, , [.D{, #}] case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element imm = emitGetInsSC(id); emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(id->idIns()), id->idInsOpt(), true); emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); @@ -25926,16 +26032,6 @@ void emitter::emitDispInsHelp( case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) break; - // {.D }, /Z, [{, #}] - case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - // {.D }, /Z, [{, #}] - case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - // {.D }, /Z, [{, #}] - case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - // {.D }, /Z, [{, #}] - case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - break; - default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -29504,6 +29600,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled // offsets) switch (ins) @@ -29661,14 +29765,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 9009c548e357ee..931c0af5be9501 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -611,6 +611,15 @@ static code_t insEncodeUimm5_MultipleOf4_20_to_16(ssize_t imm); // Returns the encoding for the immediate value that is a multiple of 8 as 5-bits at bit locations '20-16'. static code_t insEncodeUimm5_MultipleOf8_20_to_16(ssize_t imm); +// Returns the encoding for the immediate value that is a multiple of 2 as 6-bits at bit locations '21-16'. +static code_t insEncodeUimm6_MultipleOf2_21_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 4 as 6-bits at bit locations '21-16'. +static code_t insEncodeUimm6_MultipleOf4_21_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 8 as 6-bits at bit locations '21-16'. +static code_t insEncodeUimm6_MultipleOf8_21_to_16(ssize_t imm); + // Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. static code_t insEncodeSimm5_20_to_16(ssize_t imm); @@ -638,6 +647,9 @@ static code_t insEncodeUimm4From1_19_to_16(ssize_t imm); // Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. static code_t insEncodeUimm5_20_to_16(ssize_t imm); +// Returns the encoding for the immediate value as 6-bits at bit locations '21-16'. +static code_t insEncodeUimm6_21_to_16(ssize_t imm); + // Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. static code_t insEncodeImm8_12_to_5(ssize_t imm); @@ -727,6 +739,24 @@ static bool isValidUimm5_MultipleOf8(ssize_t value) return (0 <= value) && (value <= 248) && (value % 8 == 0); }; +// Returns true if 'value' is a legal signed multiple of 2 immediate 6 bit encoding (such as for LD1RH). +static bool isValidUimm6_MultipleOf2(ssize_t value) +{ + return (0 <= value) && (value <= 126) && (value % 2 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 4 immediate 6 bit encoding (such as for LD1RSW). +static bool isValidUimm6_MultipleOf4(ssize_t value) +{ + return (0 <= value) && (value <= 252) && (value % 4 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 8 immediate 6 bit encoding (such as for LD1RD). +static bool isValidUimm6_MultipleOf8(ssize_t value) +{ + return (0 <= value) && (value <= 504) && (value % 8 == 0); +}; + // Returns true if 'value' is a legal immediate 1 bit encoding (such as for PEXT). static bool isValidImm1(ssize_t value) { @@ -763,6 +793,12 @@ static bool isValidUimm5(ssize_t value) return (0 <= value) && (value <= 0x1FLL); }; +// Returns true if 'value' is a legal unsigned immediate 6 bit encoding (such as for LD1RD). +static bool isValidUimm6(ssize_t value) +{ + return (0 <= value) && (value <= 63); +}; + // Returns true if 'value' is a legal unsigned immediate 7 bit encoding (such as for CMPLT, CMPNE). static bool isValidUimm7(ssize_t value) { From 2863c7f4bc02af27d93cb8d10a833a41320464de Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 12 Feb 2024 15:24:59 -0800 Subject: [PATCH 07/12] cleanup --- src/coreclr/jit/codegenarm64test.cpp | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 2e6293f8c664bb..67afbce8339829 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7583,28 +7583,6 @@ void CodeGen::genArm64EmitterUnitTestsSve() // INS_OPTS_SCALABLE_B); // PRFH , , [{, #, MUL VL}] //theEmitter->emitIns_R_R_I(INS_sve_prfw, EA_SCALABLE, REG_P0, REG_R0, 5, // INS_OPTS_SCALABLE_B); // PRFW , , [{, #, MUL VL}] - - //// IF_SVE_IC_3A - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rd, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RD {.D }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsw, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RSW {.D }, /Z, [{, #}] - - //// IF_SVE_IC_3A_A - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RSH {.D }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RW {.D }, /Z, [{, #}] - - //// IF_SVE_IC_3A_B - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RH {.D }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RSB {.D }, /Z, [{, #}] - - //// IF_SVE_IC_3A_C - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RB {.D }, /Z, [{, #}] } #endif // defined(TARGET_ARM64) && defined(DEBUG) From 3b180b2f733fbf3240c12131af876b0e56802c20 Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 12 Feb 2024 15:53:47 -0800 Subject: [PATCH 08/12] SVE_IC_3A_A working --- src/coreclr/jit/codegenarm64test.cpp | 38 ++++++-- src/coreclr/jit/emitarm64.cpp | 139 +++++++++++++++++++++++++++ src/coreclr/jit/emitarm64.h | 4 + 3 files changed, 171 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 67afbce8339829..4f056da011eed7 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7508,20 +7508,38 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsw, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 252, INS_OPTS_SCALABLE_D); // LD1RSW {.D }, /Z, [{, #}] - //// IF_SVE_IC_3A_A - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RSH {.D }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RW {.D }, /Z, [{, #}] + // IF_SVE_IC_3A_A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 0, + INS_OPTS_SCALABLE_S); // LD1RSH {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 0, + INS_OPTS_SCALABLE_S); // LD1RW {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 126, + INS_OPTS_SCALABLE_D); // LD1RSH {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 252, + INS_OPTS_SCALABLE_D); // LD1RW {.D }, /Z, [{, #}] //// IF_SVE_IC_3A_B - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RH {.D }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // LD1RSB {.D }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + // INS_OPTS_SCALABLE_H); // LD1RH {.H }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + // INS_OPTS_SCALABLE_H); // LD1RSB {.H }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + // INS_OPTS_SCALABLE_S); // LD1RH {.S }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + // INS_OPTS_SCALABLE_S); // LD1RSB {.S }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + // INS_OPTS_SCALABLE_D); // LD1RH {.D }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + // INS_OPTS_SCALABLE_D); // LD1RSB {.D }, /Z, [{, #}] //// IF_SVE_IC_3A_C - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_P0, REG_V0, REG_R0, 5, + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + // INS_OPTS_SCALABLE_B); // LD1RB {.B }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + // INS_OPTS_SCALABLE_H); // LD1RB {.H }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, + // INS_OPTS_SCALABLE_S); // LD1RB {.S }, /Z, [{, #}] + //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, // INS_OPTS_SCALABLE_B); // LD1RB {.D }, /Z, [{, #}] // IF_SVE_HY_3A diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 8e15598eb19076..33755acb2a4800 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2259,6 +2259,14 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element break; @@ -12556,6 +12564,24 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_IC_3A; break; + case INS_sve_ld1rsh: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6_MultipleOf2(imm)); + fmt = IF_SVE_IC_3A_A; + break; + + case INS_sve_ld1rw: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6_MultipleOf4(imm)); + fmt = IF_SVE_IC_3A_A; + break; + default: unreached(); break; @@ -18842,6 +18868,97 @@ void emitter::emitIns_Call(EmitCallType callType, return code; } +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + * for the 'dtypeh' and 'dtypel' fields. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtypeh_dtypel(instruction ins, insFormat fmt, emitAttr size, code_t code) +{ + switch (fmt) + { + case IF_SVE_IC_3A_A: + switch (size) + { + case EA_4BYTE: + switch (ins) + { + case INS_sve_ld1rsh: + return code | (1 << 13); // set bit '13' + + case INS_sve_ld1rw: + return code | (1 << 14); // set bit '14' + + default: + break; + } + break; + + case EA_8BYTE: + switch (ins) + { + case INS_sve_ld1rsh: + return code; + + case INS_sve_ld1rw: + return code | (1 << 14) | (1 << 13); // set bits '14' and '13' + + default: + break; + } + break; + + default: + break; + } + break; + + case IF_SVE_IC_3A_B: + switch (size) + { + case EA_2BYTE: + break; + + case EA_4BYTE: + break; + + case EA_8BYTE: + break; + + default: + break; + } + break; + + case IF_SVE_IC_3A_C: + switch (size) + { + case EA_1BYTE: + break; + + case EA_2BYTE: + break; + + case EA_4BYTE: + break; + + case EA_8BYTE: + break; + + default: + break; + } + break; + + default: + break; + } + + assert(!"Unexpected instruction format"); + return code; +} + /***************************************************************************** * * Returns the encoding for the immediate value as 4-bits at bit locations '19-16'. @@ -22519,7 +22636,29 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code = insEncodeSveElemsize_dtypeh_dtypel(ins, fmt, optGetSveElemsize(id->idInsOpt()), code); + + switch (ins) + { + case INS_sve_ld1rw: + code |= insEncodeUimm6_MultipleOf4_21_to_16(imm); // iiiiii + break; + + case INS_sve_ld1rh: + case INS_sve_ld1rsh: + code |= insEncodeUimm6_MultipleOf2_21_to_16(imm); // iiiiii + break; + + default: + code |= insEncodeUimm6_21_to_16(imm); // iiiiii + break; + } + dst += emitOutput_Instr(dst, code); break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 931c0af5be9501..5352872ef84def 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -581,6 +581,10 @@ static code_t insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t // for the 'dtype' field. static code_t insEncodeSveElemsize_dtype_ld1w(instruction ins, insFormat fmt, emitAttr size, code_t code); +// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction +// for the 'dtypeh' and 'dtypel' fields. +static code_t insEncodeSveElemsize_dtypeh_dtypel(instruction ins, insFormat fmt, emitAttr size, code_t code); + // Returns the encoding for the immediate value as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_19_to_16(ssize_t imm); From 38d51a1feece3435fb763ef8fdf1ac65de7b461f Mon Sep 17 00:00:00 2001 From: TIHan Date: Mon, 12 Feb 2024 16:21:36 -0800 Subject: [PATCH 09/12] Finishing up --- src/coreclr/jit/codegenarm64test.cpp | 106 ++------- src/coreclr/jit/emitarm64.cpp | 316 +++++++-------------------- 2 files changed, 102 insertions(+), 320 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 4f056da011eed7..755fe58503f5a0 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7518,89 +7518,29 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 252, INS_OPTS_SCALABLE_D); // LD1RW {.D }, /Z, [{, #}] - //// IF_SVE_IC_3A_B - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, - // INS_OPTS_SCALABLE_H); // LD1RH {.H }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, - // INS_OPTS_SCALABLE_H); // LD1RSB {.H }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, - // INS_OPTS_SCALABLE_S); // LD1RH {.S }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, - // INS_OPTS_SCALABLE_S); // LD1RSB {.S }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, - // INS_OPTS_SCALABLE_D); // LD1RH {.D }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, - // INS_OPTS_SCALABLE_D); // LD1RSB {.D }, /Z, [{, #}] - - //// IF_SVE_IC_3A_C - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, - // INS_OPTS_SCALABLE_B); // LD1RB {.B }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, - // INS_OPTS_SCALABLE_H); // LD1RB {.H }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, - // INS_OPTS_SCALABLE_S); // LD1RB {.S }, /Z, [{, #}] - //theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, 0, - // INS_OPTS_SCALABLE_B); // LD1RB {.D }, /Z, [{, #}] - - // IF_SVE_HY_3A - //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_S); // PRFB , , [, .S, ] - //theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_S); // PRFD , , [, .S, #3] - //theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_S); // PRFH , , [, .S, #1] - //theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_S); // PRFW , , [, .S, #2] - - //// IF_SVE_HY_3A_A - //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFB , , [, .D, ] - //theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFD , , [, .D, #3] - //theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFH , , [, .D, #1] - //theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFW , , [, .D, #2] - - //// IF_SVE_HY_3B - //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFB , , [, .D] - //theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFD , , [, .D, LSL #3] - //theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFH , , [, .D, LSL #1] - //theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFW , , [, .D, LSL #2] - - //// IF_SVE_IB_3A - //theEmitter->emitIns_R_R_R(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFB , , [, ] - //theEmitter->emitIns_R_R_R(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFD , , [, , LSL #3] - //theEmitter->emitIns_R_R_R(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFH , , [, , LSL #1] - //theEmitter->emitIns_R_R_R(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, REG_R0, - // INS_OPTS_SCALABLE_B); // PRFW , , [, , LSL #2] - - //// IF_SVE_HZ_2A_B - //theEmitter->emitIns_R_R_I(INS_sve_prfb, EA_SCALABLE, REG_V0, REG_P0, 5, - // INS_OPTS_SCALABLE_B); // PRFB , , [.D{, #}] - //theEmitter->emitIns_R_R_I(INS_sve_prfd, EA_SCALABLE, REG_V0, REG_P0, 5, - // INS_OPTS_SCALABLE_B); // PRFD , , [.D{, #}] - //theEmitter->emitIns_R_R_I(INS_sve_prfh, EA_SCALABLE, REG_V0, REG_P0, 5, - // INS_OPTS_SCALABLE_B); // PRFH , , [.D{, #}] - //theEmitter->emitIns_R_R_I(INS_sve_prfw, EA_SCALABLE, REG_V0, REG_P0, 5, - // INS_OPTS_SCALABLE_B); // PRFW , , [.D{, #}] - - //// IF_SVE_IA_2A - //theEmitter->emitIns_R_R_I(INS_sve_prfb, EA_SCALABLE, REG_P0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // PRFB , , [{, #, MUL VL}] - //theEmitter->emitIns_R_R_I(INS_sve_prfd, EA_SCALABLE, REG_P0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // PRFD , , [{, #, MUL VL}] - //theEmitter->emitIns_R_R_I(INS_sve_prfh, EA_SCALABLE, REG_P0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // PRFH , , [{, #, MUL VL}] - //theEmitter->emitIns_R_R_I(INS_sve_prfw, EA_SCALABLE, REG_P0, REG_R0, 5, - // INS_OPTS_SCALABLE_B); // PRFW , , [{, #, MUL VL}] + // IF_SVE_IC_3A_B + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 0, + INS_OPTS_SCALABLE_H); // LD1RH {.H }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V6, REG_P5, REG_R4, 0, + INS_OPTS_SCALABLE_H); // LD1RSB {.H }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 126, + INS_OPTS_SCALABLE_S); // LD1RH {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V2, REG_P1, REG_R0, 63, + INS_OPTS_SCALABLE_S); // LD1RSB {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 126, + INS_OPTS_SCALABLE_D); // LD1RH {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 63, + INS_OPTS_SCALABLE_D); // LD1RSB {.D }, /Z, [{, #}] + + // IF_SVE_IC_3A_C + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V1, REG_P2, REG_R3, 0, + INS_OPTS_SCALABLE_B); // LD1RB {.B }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 63, + INS_OPTS_SCALABLE_H); // LD1RB {.H }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V6, REG_P7, REG_R8, 0, + INS_OPTS_SCALABLE_S); // LD1RB {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V1, REG_P0, REG_R9, 63, + INS_OPTS_SCALABLE_B); // LD1RB {.D }, /Z, [{, #}] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 33755acb2a4800..381978c5e1df45 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2268,24 +2268,21 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - break; - - case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit - // scaled offsets) - break; - - case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) - break; - - case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) + elemsize = id->idOpSize(); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); break; - case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + elemsize = id->idOpSize(); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); break; default: @@ -12582,6 +12579,33 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_IC_3A_A; break; + case INS_sve_ld1rh: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6_MultipleOf2(imm)); + fmt = IF_SVE_IC_3A_B; + break; + + case INS_sve_ld1rsb: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6(imm)); + fmt = IF_SVE_IC_3A_B; + break; + + case INS_sve_ld1rb: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6(imm)); + fmt = IF_SVE_IC_3A_C; + break; + default: unreached(); break; @@ -17424,6 +17448,7 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_sve_ld1q: case INS_sve_ldnt1sw: case INS_sve_st1q: + case INS_sve_ld1rb: return 1; case INS_sve_ld2b: @@ -18918,12 +18943,45 @@ void emitter::emitIns_Call(EmitCallType callType, switch (size) { case EA_2BYTE: + switch (ins) + { + case INS_sve_ld1rh: + return code | (1 << 13); // set bit '13' + + case INS_sve_ld1rsb: + return code | (1 << 24) | (1 << 14); // set bit '24' and '14' + + default: + break; + } break; case EA_4BYTE: + switch (ins) + { + case INS_sve_ld1rh: + return code | (1 << 14); // set bit '14' + + case INS_sve_ld1rsb: + return code | (1 << 24) | (1 << 13); // set bit '24' and '13' + + default: + break; + } break; case EA_8BYTE: + switch (ins) + { + case INS_sve_ld1rh: + return code | (1 << 14) | (1 << 13); // set bits '14' and '13' + + case INS_sve_ld1rsb: + return code | (1 << 24); // set bit '24' + + default: + break; + } break; default: @@ -18932,19 +18990,20 @@ void emitter::emitIns_Call(EmitCallType callType, break; case IF_SVE_IC_3A_C: + assert(ins == INS_sve_ld1rb); switch (size) { case EA_1BYTE: - break; + return code; case EA_2BYTE: - break; + return code | (1 << 13); // set bit '13' case EA_4BYTE: - break; + return code | (1 << 14); // set bit '14' case EA_8BYTE: - break; + return code | (1 << 14) | (1 << 13); // set bits '14' and '13' default: break; @@ -22662,31 +22721,6 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; - case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit - // scaled offsets) - code = emitInsCodeSve(ins, fmt); - dst += emitOutput_Instr(dst, code); - break; - - case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) - code = emitInsCodeSve(ins, fmt); - dst += emitOutput_Instr(dst, code); - break; - - case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) - code = emitInsCodeSve(ins, fmt); - dst += emitOutput_Instr(dst, code); - break; - - case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) - code = emitInsCodeSve(ins, fmt); - dst += emitOutput_Instr(dst, code); - break; - default: assert(!"Unexpected format"); break; @@ -26136,41 +26170,6 @@ void emitter::emitDispInsHelp( emitDispSveImmIndex(id->idReg3(), id->idInsOpt(), imm); break; - // , , [, .S, ] - // , , [, .S, #1] - // , , [, .S, #2] - // , , [, .S, #3] - case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - // , , [, .D, ] - // , , [, .D, #1] - // , , [, .D, #2] - // , , [, .D, #3] - case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit - // scaled offsets) - break; - - // , , [, .D] - // , , [, .D, LSL #1] - // , , [, .D, LSL #2] - // , , [, .D, LSL #3] - case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - // , , [, ] - // , , [, , LSL #1] - // , , [, , LSL #2] - // , , [, , LSL #3] - case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) - break; - - // , , [.D{, #}] - case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) - break; - - // , , [{, #, MUL VL}] - case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) - break; - default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -29747,163 +29746,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_6C; break; - case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - switch (ins) - { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - - case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit - // scaled offsets) - switch (ins) - { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - - case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - switch (ins) - { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - - case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) - switch (ins) - { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - - case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) - switch (ins) - { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - - case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) - switch (ins) - { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - default: // all other instructions perfScoreUnhandledInstruction(id, &result); From bda779e74420ea0fe5ec66c9a7115582144d1ba1 Mon Sep 17 00:00:00 2001 From: TIHan Date: Tue, 13 Feb 2024 11:10:24 -0800 Subject: [PATCH 10/12] remove case --- src/coreclr/jit/emitarm64.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 381978c5e1df45..dc5889b040a73d 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -11025,9 +11025,6 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_SVE_HP_3A; break; - case INS_sve_prfb: - break; - default: unreached(); break; From 147605e5f3d58ce4e293a1363e722fe792bcca57 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 15 Feb 2024 15:09:07 -0800 Subject: [PATCH 11/12] Fix build. Added emitInsSve_R_R_R_I --- src/coreclr/jit/emitarm64.cpp | 523 ++++++++++++++++++---------------- src/coreclr/jit/emitarm64.h | 12 +- 2 files changed, 286 insertions(+), 249 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 0714d666801afb..b3f47cc0f7cbb5 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -11581,101 +11581,6 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; - case INS_sve_cmpeq: - case INS_sve_cmpgt: - case INS_sve_cmpge: - case INS_sve_cmpne: - case INS_sve_cmple: - case INS_sve_cmplt: - assert(insOptsScalableStandard(opt)); - assert(isPredicateRegister(reg1)); // DDDD - assert(isLowPredicateRegister(reg2)); // ggg - assert(isVectorRegister(reg3)); // nnnnn - assert(isValidSimm5(imm)); // iiiii - fmt = IF_SVE_CY_3A; - break; - - case INS_sve_cmphi: - case INS_sve_cmphs: - case INS_sve_cmplo: - case INS_sve_cmpls: - assert(insOptsScalableStandard(opt)); - assert(isPredicateRegister(reg1)); // DDDD - assert(isLowPredicateRegister(reg2)); // ggg - assert(isVectorRegister(reg3)); // nnnnn - assert(isValidUimm7(imm)); // iiiii - fmt = IF_SVE_CY_3B; - break; - - case INS_sve_sdot: - case INS_sve_udot: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isLowVectorRegister(reg3)); // mmmm - - if (opt == INS_OPTS_SCALABLE_B) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm2(imm)); // ii - fmt = IF_SVE_EY_3A; - } - else if (opt == INS_OPTS_SCALABLE_H) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm2(imm)); // ii - fmt = IF_SVE_EG_3A; - } - else - { - assert(insOptsNone(opt)); - assert(isValidImm1(imm)); // i - opt = INS_OPTS_SCALABLE_H; - fmt = IF_SVE_EY_3B; - } - break; - - case INS_sve_usdot: - case INS_sve_sudot: - assert(opt == INS_OPTS_SCALABLE_B); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); // mmm - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); - assert(isValidUimm2(imm)); // ii - fmt = IF_SVE_EZ_3A; - break; - - case INS_sve_mul: - assert(insOptsScalableAtLeastHalf(opt)); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isLowVectorRegister(reg3)); // mmmm - - switch (opt) - { - case INS_OPTS_SCALABLE_H: - assert(isValidUimm3(imm)); // iii - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - fmt = IF_SVE_FD_3A; - break; - - case INS_OPTS_SCALABLE_S: - assert(isValidUimm2(imm)); // ii - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - fmt = IF_SVE_FD_3B; - break; - - case INS_OPTS_SCALABLE_D: - assert(isValidImm1(imm)); // i - fmt = IF_SVE_FD_3C; - break; - - default: - unreached(); - break; - } - break; - case INS_fmul: // by element, imm[0..3] selects the element of reg3 case INS_fmla: case INS_fmls: @@ -11915,6 +11820,281 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_DV_3AI; break; + default: + // fallback to emit SVE instructions. + return emitInsSve_R_R_R_I(ins, attr, reg1, reg2, reg3, imm, opt, attrReg2); + + } // end switch (ins) + + if (isLdSt) + { + assert(!isAddSub); + assert(isGeneralRegisterOrSP(reg3)); + assert(insOptsNone(opt) || insOptsIndexed(opt)); + + if (isSIMD) + { + assert(isValidVectorLSPDatasize(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert((scale >= 2) && (scale <= 4)); + } + else + { + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert((scale == 2) || (scale == 3)); + } + + // Load/Store Pair reserved encodings: + if (emitInsIsLoad(ins)) + { + assert(reg1 != reg2); + } + if (insOptsIndexed(opt)) + { + assert(reg1 != reg3); + assert(reg2 != reg3); + } + + reg3 = encodingSPtoZR(reg3); + + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0) + { + assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero + + fmt = IF_LS_3B; + } + else + { + if ((imm & mask) == 0) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + + if ((imm >= -64) && (imm <= 63)) + { + fmt = IF_LS_3C; + } + } +#ifdef DEBUG + if (fmt != IF_LS_3C) + { + assert(!"Instruction cannot be encoded: IF_LS_3C"); + } +#endif + } + } + else if (isAddSub) + { + bool reg2IsSP = (reg2 == REG_SP); + assert(!isLdSt); + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg3)); + + if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option + { + assert(isGeneralRegisterOrZR(reg1)); + } + else + { + assert(isGeneralRegisterOrSP(reg1)); + reg1 = encodingSPtoZR(reg1); + } + + if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option + { + assert(isGeneralRegister(reg2)); + } + else + { + assert(isGeneralRegisterOrSP(reg2)); + reg2 = encodingSPtoZR(reg2); + } + + if (insOptsAnyExtend(opt)) + { + assert((imm >= 0) && (imm <= 4)); + + fmt = IF_DR_3C; + } + else if (insOptsAluShift(opt)) + { + // imm should be non-zero and in [1..63] + assert(isValidImmShift(imm, size) && (imm != 0)); + fmt = IF_DR_3B; + } + else if (imm == 0) + { + assert(insOptsNone(opt)); + + if (reg2IsSP) + { + // To encode the SP register as reg2 we must use the IF_DR_3C encoding + // and also specify a LSL of zero (imm == 0) + opt = INS_OPTS_LSL; + fmt = IF_DR_3C; + } + else + { + fmt = IF_DR_3A; + } + } + else + { + assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A"); + } + } + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrCns(attr, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + + // Record the attribute for the second register in the pair + id->idGCrefReg2(GCT_NONE); + if (attrReg2 != EA_UNKNOWN) + { + // Record the attribute for the second register in the pair + assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C)); + if (EA_IS_GCREF(attrReg2)) + { + id->idGCrefReg2(GCT_GCREF); + } + else if (EA_IS_BYREF(attrReg2)) + { + id->idGCrefReg2(GCT_BYREF); + } + } + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add a SVE instruction referencing three registers and a constant. + */ + +void emitter::emitInsSve_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt /* = INS_OPTS_NONE */, + emitAttr attrReg2 /* = EA_UNKNOWN */) +{ + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_sve_cmpeq: + case INS_sve_cmpgt: + case INS_sve_cmpge: + case INS_sve_cmpne: + case INS_sve_cmple: + case INS_sve_cmplt: + assert(insOptsScalableStandard(opt)); + assert(isPredicateRegister(reg1)); // DDDD + assert(isLowPredicateRegister(reg2)); // ggg + assert(isVectorRegister(reg3)); // nnnnn + assert(isValidSimm5(imm)); // iiiii + fmt = IF_SVE_CY_3A; + break; + + case INS_sve_cmphi: + case INS_sve_cmphs: + case INS_sve_cmplo: + case INS_sve_cmpls: + assert(insOptsScalableStandard(opt)); + assert(isPredicateRegister(reg1)); // DDDD + assert(isLowPredicateRegister(reg2)); // ggg + assert(isVectorRegister(reg3)); // nnnnn + assert(isValidUimm7(imm)); // iiiii + fmt = IF_SVE_CY_3B; + break; + + case INS_sve_sdot: + case INS_sve_udot: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm + + if (opt == INS_OPTS_SCALABLE_B) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_EY_3A; + } + else if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_EG_3A; + } + else + { + assert(insOptsNone(opt)); + assert(isValidImm1(imm)); // i + opt = INS_OPTS_SCALABLE_H; + fmt = IF_SVE_EY_3B; + } + break; + + case INS_sve_usdot: + case INS_sve_sudot: + assert(opt == INS_OPTS_SCALABLE_B); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmm + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_EZ_3A; + break; + + case INS_sve_mul: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm + + switch (opt) + { + case INS_OPTS_SCALABLE_H: + assert(isValidUimm3(imm)); // iii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FD_3A; + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm2(imm)); // ii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FD_3B; + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidImm1(imm)); // i + fmt = IF_SVE_FD_3C; + break; + + default: + unreached(); + break; + } + break; + case INS_sve_cdot: assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); // ddddd @@ -12964,8 +13144,8 @@ void emitter::emitIns_R_R_R_I(instruction ins, assert(isGeneralRegister(reg3)); assert(isValidUimm6(imm)); fmt = IF_SVE_IC_3A_C; - break - + break; + case INS_sve_fmlalb: case INS_sve_fmlalt: case INS_sve_fmlslb: @@ -12988,128 +13168,6 @@ void emitter::emitIns_R_R_R_I(instruction ins, break; } // end switch (ins) - - if (isLdSt) - { - assert(!isAddSub); - assert(isGeneralRegisterOrSP(reg3)); - assert(insOptsNone(opt) || insOptsIndexed(opt)); - - if (isSIMD) - { - assert(isValidVectorLSPDatasize(size)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert((scale >= 2) && (scale <= 4)); - } - else - { - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegisterOrZR(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert((scale == 2) || (scale == 3)); - } - - // Load/Store Pair reserved encodings: - if (emitInsIsLoad(ins)) - { - assert(reg1 != reg2); - } - if (insOptsIndexed(opt)) - { - assert(reg1 != reg3); - assert(reg2 != reg3); - } - - reg3 = encodingSPtoZR(reg3); - - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (imm == 0) - { - assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero - - fmt = IF_LS_3B; - } - else - { - if ((imm & mask) == 0) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - - if ((imm >= -64) && (imm <= 63)) - { - fmt = IF_LS_3C; - } - } -#ifdef DEBUG - if (fmt != IF_LS_3C) - { - assert(!"Instruction cannot be encoded: IF_LS_3C"); - } -#endif - } - } - else if (isAddSub) - { - bool reg2IsSP = (reg2 == REG_SP); - assert(!isLdSt); - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg3)); - - if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option - { - assert(isGeneralRegisterOrZR(reg1)); - } - else - { - assert(isGeneralRegisterOrSP(reg1)); - reg1 = encodingSPtoZR(reg1); - } - - if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option - { - assert(isGeneralRegister(reg2)); - } - else - { - assert(isGeneralRegisterOrSP(reg2)); - reg2 = encodingSPtoZR(reg2); - } - - if (insOptsAnyExtend(opt)) - { - assert((imm >= 0) && (imm <= 4)); - - fmt = IF_DR_3C; - } - else if (insOptsAluShift(opt)) - { - // imm should be non-zero and in [1..63] - assert(isValidImmShift(imm, size) && (imm != 0)); - fmt = IF_DR_3B; - } - else if (imm == 0) - { - assert(insOptsNone(opt)); - - if (reg2IsSP) - { - // To encode the SP register as reg2 we must use the IF_DR_3C encoding - // and also specify a LSL of zero (imm == 0) - opt = INS_OPTS_LSL; - fmt = IF_DR_3C; - } - else - { - fmt = IF_DR_3A; - } - } - else - { - assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A"); - } - } - assert(fmt != IF_NONE); instrDesc* id = emitNewInstrCns(attr, imm); @@ -13122,22 +13180,6 @@ void emitter::emitIns_R_R_R_I(instruction ins, id->idReg2(reg2); id->idReg3(reg3); - // Record the attribute for the second register in the pair - id->idGCrefReg2(GCT_NONE); - if (attrReg2 != EA_UNKNOWN) - { - // Record the attribute for the second register in the pair - assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C)); - if (EA_IS_GCREF(attrReg2)) - { - id->idGCrefReg2(GCT_GCREF); - } - else if (EA_IS_BYREF(attrReg2)) - { - id->idGCrefReg2(GCT_BYREF); - } - } - dispIns(id); appendToCurIG(id); } @@ -19732,17 +19774,6 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)imm << 16; } -/***************************************************************************** - * - * Returns the encoding for the unsigned immediate value as 5-bits at bit locations '20-16'. - */ - -/*static*/ emitter::code_t emitter::insEncodeUimm5_20_to_16(ssize_t imm) -{ - assert(isValidUimm5(imm)); - return (code_t)imm << 16; -} - /***************************************************************************** * * Returns the encoding for the immediate value as 2-bits at bit locations '9-8'. diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 6e49caa3378276..4ae51c80fb2096 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -630,9 +630,6 @@ static code_t insEncodeUimm6_MultipleOf8_21_to_16(ssize_t imm); // Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. static code_t insEncodeSimm5_20_to_16(ssize_t imm); -// Returns the encoding for the unsigned immediate value as 5-bits at bit locations '20-16'. -static code_t insEncodeUimm5_20_to_16(ssize_t imm); - // Returns the encoding for the immediate value as 2-bits at bit locations '9-8'. static code_t insEncodeUimm2_9_to_8(ssize_t imm); @@ -1438,6 +1435,15 @@ void emitIns_R_R_R_I(instruction ins, insOpts opt = INS_OPTS_NONE, emitAttr attrReg2 = EA_UNKNOWN); +void emitInsSve_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt = INS_OPTS_NONE, + emitAttr attrReg2 = EA_UNKNOWN); + void emitIns_R_R_R_I_I(instruction ins, emitAttr attr, regNumber reg1, From d04f8c6d473427eb3add770325c6985d847d5ca9 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 15 Feb 2024 16:12:50 -0800 Subject: [PATCH 12/12] Formatting --- src/coreclr/jit/emitarm64.cpp | 15 +++++++++------ src/coreclr/jit/emitarm64.h | 3 ++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index b3f47cc0f7cbb5..dea80c05e6b8be 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2374,7 +2374,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg3())); break; - case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) elemsize = id->idOpSize(); assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); assert(isScalableVectorSize(elemsize)); @@ -2402,7 +2402,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm5_MultipleOf8(emitGetInsSC(id))); break; - case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element elemsize = id->idOpSize(); assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); assert(isScalableVectorSize(elemsize)); @@ -19484,7 +19484,10 @@ void emitter::emitIns_Call(EmitCallType callType, * for the 'dtypeh' and 'dtypel' fields. */ -/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtypeh_dtypel(instruction ins, insFormat fmt, emitAttr size, code_t code) +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtypeh_dtypel(instruction ins, + insFormat fmt, + emitAttr size, + code_t code) { switch (fmt) { @@ -23392,7 +23395,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; - case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt @@ -26985,12 +26988,12 @@ void emitter::emitDispInsHelp( // {.D }, /Z, [.D{, #}] case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) // {.D }, /Z, [.D{, #}] - case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) // {.S }, , [.S{, #}] // {.D }, , [.D{, #}] case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) // {.D }, , [.D{, #}] - case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) // {.D }, /Z, [{, #}] case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element // {.D }, /Z, [{, #}] diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 4ae51c80fb2096..cd4b3bc9738198 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -534,7 +534,8 @@ static code_t insEncodeSveElemsize_sz_21(emitAttr size); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); -// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30' or '21'. +// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30' or +// '21'. // This only works on select formats. static code_t insEncodeSveElemsize_30_or_21(insFormat fmt, emitAttr size);