diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 3327e4bb2cdf41..05246d737bd6ea 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2088,6 +2088,7 @@ class emitter #define PERFSCORE_LATENCY_14C 14.0f #define PERFSCORE_LATENCY_15C 15.0f #define PERFSCORE_LATENCY_16C 16.0f +#define PERFSCORE_LATENCY_17C 17.0f #define PERFSCORE_LATENCY_18C 18.0f #define PERFSCORE_LATENCY_20C 20.0f #define PERFSCORE_LATENCY_22C 22.0f diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index ff65e2d883a25f..c7380c61793dc8 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -20913,7 +20913,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins { // ins reg, mem result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_3C : PERFSCORE_LATENCY_2C; + result.insLatency += opSize == EA_16BYTE ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_3C; } else { @@ -20943,11 +20943,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; } + case INS_lddqu: case INS_movntdqa: { assert(memAccessKind == PERFSCORE_MEMORY_READ); result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_3C : PERFSCORE_LATENCY_2C; + result.insLatency += opSize == EA_16BYTE ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_3C; break; } @@ -20963,7 +20964,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins else if (memAccessKind == PERFSCORE_MEMORY_READ) { result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_3C : PERFSCORE_LATENCY_2C; + result.insLatency += (EA_SIZE_IN_BYTES(opSize) <= 16) ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_3C; } else { @@ -20973,13 +20974,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; } - case INS_lddqu: - { - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_3C : PERFSCORE_LATENCY_2C; - break; - } - case INS_vpmovdb: case INS_vpmovdw: case INS_vpmovqb: @@ -21147,7 +21141,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } else { - result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_4C : PERFSCORE_LATENCY_2C; + result.insLatency += opSize == EA_16BYTE ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_4C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; } break; @@ -21231,7 +21225,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_pmovzxdq: { result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_3C : PERFSCORE_LATENCY_1C; + result.insLatency += opSize == EA_16BYTE ? PERFSCORE_LATENCY_1C : PERFSCORE_LATENCY_3C; break; } @@ -21259,7 +21253,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vcvtuqq2ps: { result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_7C : PERFSCORE_LATENCY_5C; + result.insLatency += opSize == EA_16BYTE ? PERFSCORE_LATENCY_5C : PERFSCORE_LATENCY_7C; break; } @@ -21349,8 +21343,22 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vpgatherdd: case INS_vgatherdps: { - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_13C : PERFSCORE_LATENCY_11C; + if (opSize == EA_16BYTE) + { + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency += PERFSCORE_LATENCY_11C; + } + else if (opSize == EA_32BYTE) + { + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency += PERFSCORE_LATENCY_13C; + } + else + { + assert(opSize == EA_64BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_8C; + result.insLatency += PERFSCORE_LATENCY_17C; + } break; } @@ -21361,8 +21369,22 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vgatherqps: case INS_vgatherqpd: { - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_11C : PERFSCORE_LATENCY_9C; + if (opSize == EA_16BYTE) + { + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency += PERFSCORE_LATENCY_9C; + } + else if (opSize == EA_32BYTE) + { + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency += PERFSCORE_LATENCY_11C; + } + else + { + assert(opSize == EA_64BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency += PERFSCORE_LATENCY_13C; + } break; }