diff --git a/src/coreclr/src/jit/emit.cpp b/src/coreclr/src/jit/emit.cpp index 65e688621afe38..0509ff28c2d98a 100644 --- a/src/coreclr/src/jit/emit.cpp +++ b/src/coreclr/src/jit/emit.cpp @@ -1146,18 +1146,17 @@ float emitter::insEvaluateExecutionCost(instrDesc* id) // if we return these are updated with default values // // Notes: -// When validating that the PerfScore handles every instruction. -// the #if 0 block is changed into a #ifdef DEBUG -// We will print the instruction and instruction group +// We print the instruction and instruction group // and instead of returning we will assert // -// Otherwise we will return default latencies of 1 cycle. +// This method asserts with a debug/checked build +// and returns default latencies of 1 cycle otherwise. // void emitter::perfScoreUnhandledInstruction(instrDesc* id, insExecutionCharacteristics* pResult) { -// Change this to #ifdef DEBUG to assert on any unhandled instructions -#if 0 - printf("PerfScore: unhandled instruction: %s, format %s", codeGen->genInsName(id->idIns()), emitIfName(id->idInsFmt())); +#ifdef DEBUG + printf("PerfScore: unhandled instruction: %s, format %s", codeGen->genInsName(id->idIns()), + emitIfName(id->idInsFmt())); assert(!"PerfScore: unhandled instruction"); #endif pResult->insThroughput = PERFSCORE_THROUGHPUT_1C; diff --git a/src/coreclr/src/jit/emitarm64.cpp b/src/coreclr/src/jit/emitarm64.cpp index ec49e099bd99d3..35bb5dc0ccea69 100644 --- a/src/coreclr/src/jit/emitarm64.cpp +++ b/src/coreclr/src/jit/emitarm64.cpp @@ -12850,7 +12850,19 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_3C; break; + case INS_fcmeq: + case INS_fcmge: + case INS_fcmgt: + case INS_fcmle: + case INS_fcmlt: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_fcvtl: + case INS_fcvtl2: + case INS_fcvtn: + case INS_fcvtn2: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_4C; break; @@ -12880,11 +12892,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_DV_2Q: // faddp, fmaxnmp, fmaxp, fminnmp, fminp (scalar) case IF_DV_2R: // fmaxnmv, fmaxv, fminnmv, fminv - case IF_DV_2S: // addp (scalar) result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_4C; break; + case IF_DV_2S: // addp (scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + case IF_DV_3B: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX // faddp, fmaxnmp, fmaxp, fminnmp, fminp, addp (vector) switch (ins) @@ -12898,6 +12914,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_fsub: case INS_fmul: case INS_fmulx: + case INS_fmla: + case INS_fmls: result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_4C; break; @@ -12907,7 +12925,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_fmaxp: case INS_fminnmp: case INS_fminp: - case INS_addp: if (id->idOpSize() == EA_16BYTE) { // Q-form @@ -12975,15 +12992,16 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_4C; break; - case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX (scalar) + case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX (scalar) switch (ins) { case INS_fadd: case INS_fsub: - case INS_fmin: - case INS_fminnm: + case INS_fabd: case INS_fmax: case INS_fmaxnm: + case INS_fmin: + case INS_fminnm: case INS_fmul: case INS_fmulx: case INS_fnmul: @@ -13132,6 +13150,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case INS_addp: case INS_cmtst: case INS_pmul: case INS_sabd: @@ -13160,7 +13179,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_DV_3DI: // mul, mla, mls (scalar by elem) + case IF_DV_3DI: // fmul, fmulx, fmla, fmls (scalar by elem) result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_4C; break; @@ -13320,6 +13339,33 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_1C; break; + case IF_DV_2T: // addv, saddlv, smaxv, sminv, uaddlv, umaxv, uminv + switch (ins) + { + case INS_addv: + case INS_saddlv: + case INS_uaddlv: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case INS_smaxv: + case INS_sminv: + case INS_umaxv: + case INS_uminv: + case INS_sha256h2: + case INS_sha256su1: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result);