Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions src/coreclr/src/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1146,18 +1146,17 @@ float emitter::insEvaluateExecutionCost(instrDesc* id)
// if we return these are updated with default values
//
// Notes:
// When validating that the PerfScore handles every instruction.
// the #if 0 block is changed into a #ifdef DEBUG
// We will print the instruction and instruction group
// We print the instruction and instruction group
// and instead of returning we will assert
//
// Otherwise we will return default latencies of 1 cycle.
// This method asserts with a debug/checked build
// and returns default latencies of 1 cycle otherwise.
//
void emitter::perfScoreUnhandledInstruction(instrDesc* id, insExecutionCharacteristics* pResult)
{
// Change this to #ifdef DEBUG to assert on any unhandled instructions
#if 0
printf("PerfScore: unhandled instruction: %s, format %s", codeGen->genInsName(id->idIns()), emitIfName(id->idInsFmt()));
#ifdef DEBUG
printf("PerfScore: unhandled instruction: %s, format %s", codeGen->genInsName(id->idIns()),
emitIfName(id->idInsFmt()));
assert(!"PerfScore: unhandled instruction");
#endif
pResult->insThroughput = PERFSCORE_THROUGHPUT_1C;
Expand Down
58 changes: 52 additions & 6 deletions src/coreclr/src/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12850,7 +12850,19 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency = PERFSCORE_LATENCY_3C;
break;

case INS_fcmeq:
case INS_fcmge:
case INS_fcmgt:
case INS_fcmle:
case INS_fcmlt:
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case INS_fcvtl:
case INS_fcvtl2:
case INS_fcvtn:
case INS_fcvtn2:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_4C;
break;
Expand Down Expand Up @@ -12880,11 +12892,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins

case IF_DV_2Q: // faddp, fmaxnmp, fmaxp, fminnmp, fminp (scalar)
case IF_DV_2R: // fmaxnmv, fmaxv, fminnmv, fminv
case IF_DV_2S: // addp (scalar)
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency = PERFSCORE_LATENCY_4C;
break;

case IF_DV_2S: // addp (scalar)
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency = PERFSCORE_LATENCY_3C;
break;

case IF_DV_3B: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX
// faddp, fmaxnmp, fmaxp, fminnmp, fminp, addp (vector)
switch (ins)
Expand All @@ -12898,6 +12914,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_fsub:
case INS_fmul:
case INS_fmulx:
case INS_fmla:
case INS_fmls:
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency = PERFSCORE_LATENCY_4C;
break;
Expand All @@ -12907,7 +12925,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_fmaxp:
case INS_fminnmp:
case INS_fminp:
case INS_addp:
if (id->idOpSize() == EA_16BYTE)
{
// Q-form
Expand Down Expand Up @@ -12975,15 +12992,16 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency = PERFSCORE_LATENCY_4C;
break;

case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX (scalar)
case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX (scalar)
switch (ins)
{
case INS_fadd:
case INS_fsub:
case INS_fmin:
case INS_fminnm:
case INS_fabd:
case INS_fmax:
case INS_fmaxnm:
case INS_fmin:
case INS_fminnm:
case INS_fmul:
case INS_fmulx:
case INS_fnmul:
Expand Down Expand Up @@ -13132,6 +13150,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case INS_addp:
case INS_cmtst:
case INS_pmul:
case INS_sabd:
Expand Down Expand Up @@ -13160,7 +13179,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
}
break;

case IF_DV_3DI: // mul, mla, mls (scalar by elem)
case IF_DV_3DI: // fmul, fmulx, fmla, fmls (scalar by elem)
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_4C;
break;
Expand Down Expand Up @@ -13320,6 +13339,33 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency = PERFSCORE_LATENCY_1C;
break;

case IF_DV_2T: // addv, saddlv, smaxv, sminv, uaddlv, umaxv, uminv
switch (ins)
{
case INS_addv:
case INS_saddlv:
case INS_uaddlv:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_3C;
break;

case INS_smaxv:
case INS_sminv:
case INS_umaxv:
case INS_uminv:
case INS_sha256h2:
case INS_sha256su1:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_4C;
break;

default:
// all other instructions
perfScoreUnhandledInstruction(id, &result);
break;
}
break;

default:
// all other instructions
perfScoreUnhandledInstruction(id, &result);
Expand Down