Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion src/coreclr/src/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,7 @@ float emitter::insEvaluateExecutionCost(instrDesc* id)

if (memAccessKind == PERFSCORE_MEMORY_WRITE)
{
// We assume that we won't read back from memory for the next WR_GENERAL (3) cycles
// We assume that we won't read back from memory for the next WR_GENERAL cycles
// Thus we normally won't pay latency costs for writes.
latency = max(0.0f, latency - PERFSCORE_LATENCY_WR_GENERAL);
}
Expand All @@ -1121,6 +1121,34 @@ float emitter::insEvaluateExecutionCost(instrDesc* id)
return max(throughput, latency);
}

//------------------------------------------------------------------------------------
// perfScoreUnhandledInstruction:
// Helper method used to report an unhandled instruction
//
// Arguments:
// id - The current instruction descriptor to be evaluated
// pResult - pointer to struct holding the instruction characteristics
// if we return these are updated with default values
//
// Notes:
// When validating that the PerfScore handles every instruction.
// the #if 0 block is changed into a #ifdef DEBUG
// We will print the instruction and instruction group
// and instead of returning we will assert
//
// Otherwise we will return default latencies of 1 cycle.
//
void emitter::perfScoreUnhandledInstruction(instrDesc* id, insExecutionCharacteristics* pResult)
{
// Change this to #ifdef DEBUG to assert on any unhandled instructions
#if 0
printf("PerfScore: unhandled instruction: %s, format %s", codeGen->genInsName(id->idIns()), emitIfName(id->idInsFmt()));
assert(!"PerfScore: unhandled instruction");
#endif
pResult->insThroughput = PERFSCORE_THROUGHPUT_1C;
pResult->insLatency = PERFSCORE_LATENCY_1C;
}

#endif // defined(DEBUG) || defined(LATE_DISASM)

//----------------------------------------------------------------------------------------
Expand Down
60 changes: 44 additions & 16 deletions src/coreclr/src/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -1233,19 +1233,17 @@ class emitter
#define PERFSCORE_THROUGHPUT_4C 4.0f // slower - 4 cycles
#define PERFSCORE_THROUGHPUT_5C 5.0f // slower - 5 cycles
#define PERFSCORE_THROUGHPUT_6C 6.0f // slower - 6 cycles
#define PERFSCORE_THROUGHPUT_9C 9.0f // slower - 9 cycles
#define PERFSCORE_THROUGHPUT_10C 10.0f // slower - 10 cycles
#define PERFSCORE_THROUGHPUT_13C 13.0f // slower - 13 cycles
#define PERFSCORE_THROUGHPUT_19C 19.0f // slower - 19 cycles
#define PERFSCORE_THROUGHPUT_25C 25.0f // slower - 25 cycles
#define PERFSCORE_THROUGHPUT_33C 33.0f // slower - 33 cycles
#define PERFSCORE_THROUGHPUT_52C 52.0f // slower - 52 cycles
#define PERFSCORE_THROUGHPUT_57C 57.0f // slower - 57 cycles

#define PERFSCORE_THROUGHPUT_DEFAULT PERFSCORE_THROUGHPUT_1C

#define PERFSCORE_LATENCY_ILLEGAL -1024.0f

#define PERFSCORE_LATENCY_DEFAULT 1.0f

#define PERFSCORE_LATENCY_ZERO 0.0f
#define PERFSCORE_LATENCY_1C 1.0f
#define PERFSCORE_LATENCY_2C 2.0f
Expand All @@ -1260,7 +1258,11 @@ class emitter
#define PERFSCORE_LATENCY_11C 11.0f
#define PERFSCORE_LATENCY_12C 12.0f
#define PERFSCORE_LATENCY_13C 13.0f
#define PERFSCORE_LATENCY_15C 15.0f
#define PERFSCORE_LATENCY_16C 16.0f
#define PERFSCORE_LATENCY_18C 18.0f
#define PERFSCORE_LATENCY_20C 20.0f
#define PERFSCORE_LATENCY_22C 22.0f
#define PERFSCORE_LATENCY_23C 23.0f
#define PERFSCORE_LATENCY_26C 26.0f
#define PERFSCORE_LATENCY_62C 62.0f
Expand All @@ -1271,22 +1273,46 @@ class emitter
#define PERFSCORE_LATENCY_BRANCH_COND 2.0f // includes cost of a possible misprediction
#define PERFSCORE_LATENCY_BRANCH_INDIRECT 2.0f // includes cost of a possible misprediction

#if defined(_TARGET_XARCH_)

// a read,write or modify from stack location, possible def to use latency from L0 cache
#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_2C
#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_2C
#define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_5C

// a read, write or modify from constant location, possible def to use latency from L0 cache
#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_2C
#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_2C
#define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_5C

// a read, write or modify from memory location, possible def to use latency from L0 or L1 cache
// plus an extra cost (of 1.0) for a increased chance of a cache miss
#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_3C
#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_3C
#define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_6C

#elif defined(_TARGET_ARM64_) || defined(_TARGET_ARM_)

// a read,write or modify from stack location, possible def to use latency from L0 cache
#define PERFSCORE_LATENCY_RD_STACK 2.0f
#define PERFSCORE_LATENCY_WR_STACK 2.0f
#define PERFSCORE_LATENCY_RD_WR_STACK 5.0f
#define PERFSCORE_LATENCY_RD_STACK PERFSCORE_LATENCY_3C
#define PERFSCORE_LATENCY_WR_STACK PERFSCORE_LATENCY_1C
#define PERFSCORE_LATENCY_RD_WR_STACK PERFSCORE_LATENCY_3C

// a read, write or modify from constant location, possible def to use latency from L0 cache
#define PERFSCORE_LATENCY_RD_CONST_ADDR 2.0f
#define PERFSCORE_LATENCY_WR_CONST_ADDR 2.0f
#define PERFSCORE_LATENCY_RD_WR_CONST_ADDR 5.0f
#define PERFSCORE_LATENCY_RD_CONST_ADDR PERFSCORE_LATENCY_3C
#define PERFSCORE_LATENCY_WR_CONST_ADDR PERFSCORE_LATENCY_1C
#define PERFSCORE_LATENCY_RD_WR_CONST_ADDR PERFSCORE_LATENCY_3C

// a read, write or modify from memory location, possible def to use latency from L0 or L1 cache
// plus an extra cost (of 1.0) for a increased chance of a cache miss
#define PERFSCORE_LATENCY_RD_GENERAL 3.0f
#define PERFSCORE_LATENCY_WR_GENERAL 3.0f
#define PERFSCORE_LATENCY_RD_WR_GENERAL 6.0f
#define PERFSCORE_LATENCY_RD_GENERAL PERFSCORE_LATENCY_4C
#define PERFSCORE_LATENCY_WR_GENERAL PERFSCORE_LATENCY_1C
#define PERFSCORE_LATENCY_RD_WR_GENERAL PERFSCORE_LATENCY_4C

#endif // _TARGET_XXX

// Make this an enum:
//
#define PERFSCORE_MEMORY_NONE 0
#define PERFSCORE_MEMORY_READ 1
#define PERFSCORE_MEMORY_WRITE 2
Expand All @@ -1295,8 +1321,7 @@ class emitter
#define PERFSCORE_CODESIZE_COST_HOT 0.10f
#define PERFSCORE_CODESIZE_COST_COLD 0.01f

#define PERFSCORE_CALLEE_SPILL_COST \
0.75f // heuristicly derived - actual cost is one push and one pop, in the prolog/epilog
#define PERFSCORE_CALLEE_SPILL_COST 0.75f

struct insExecutionCharacteristics
{
Expand All @@ -1305,9 +1330,12 @@ class emitter
unsigned insMemoryAccessKind;
};

insExecutionCharacteristics getInsExecutionCharacteristics(instrDesc* id);
float insEvaluateExecutionCost(instrDesc* id);

insExecutionCharacteristics getInsExecutionCharacteristics(instrDesc* id);

void emitter::perfScoreUnhandledInstruction(instrDesc* id, insExecutionCharacteristics* result);

#endif // defined(DEBUG) || defined(LATE_DISASM)

BasicBlock::weight_t getCurrentBlockWeight();
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/src/jit/emitarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7991,8 +7991,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins

// ToDo: Calculate actual throughput and latency values
//
result.insThroughput = PERFSCORE_THROUGHPUT_DEFAULT;
result.insLatency = PERFSCORE_LATENCY_DEFAULT;
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_1C;

return result;
}
Expand Down
Loading