From 37a3e2b6bed0ce0205ed01e31d3ee710e4a51340 Mon Sep 17 00:00:00 2001 From: Finn Wilkinson Date: Fri, 28 Jul 2023 17:13:34 +0100 Subject: [PATCH 1/9] Updated a64fx_sme config. --- configs/a64fx_SME.yaml | 45 +++++------------------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml index f304234ee8..886ec8ad97 100644 --- a/configs/a64fx_SME.yaml +++ b/configs/a64fx_SME.yaml @@ -81,29 +81,17 @@ Ports: - INT_DIV_OR_SQRT 5: Portname: EAGA - Instruction-Support: - - LOAD_INT - - LOAD_SCALAR - - LOAD_VECTOR - - LOAD_SVE - - STORE_INT - - STORE_SCALAR - - STORE_VECTOR - - STORE_SVE + Instruction-Support: + - LOAD + - STORE_ADDRESS - INT_SIMPLE_ARTH_NOSHIFT - INT_SIMPLE_LOGICAL_NOSHIFT - INT_SIMPLE_CMP 6: Portname: EAGB Instruction-Support: - - LOAD_INT - - LOAD_SCALAR - - LOAD_VECTOR - - LOAD_SVE - - STORE_INT - - STORE_SCALAR - - STORE_VECTOR - - STORE_SVE + - LOAD + - STORE_ADDRESS - INT_SIMPLE_ARTH_NOSHIFT - INT_SIMPLE_LOGICAL_NOSHIFT - INT_SIMPLE_CMP @@ -115,11 +103,6 @@ Ports: Portname: SME Instruction-Support: - SME - 9: - Portname: SME_LD_STR - Instruction-Support: - - LOAD_SME - - STORE_SME Reservation-Stations: 0: Size: 20 @@ -154,11 +137,6 @@ Reservation-Stations: Dispatch-Rate: 2 Ports: - SME - 6: - Size: 10 - Dispatch-Rate: 2 - Ports: - - SME_LD_STR Execution-Units: 0: Pipelined: True @@ -214,12 +192,6 @@ Execution-Units: - INT_DIV_OR_SQRT - FP_DIV_OR_SQRT - SVE_DIV_OR_SQRT - 9: - Pipelined: True - Blocking-Groups: - - INT_DIV_OR_SQRT - - FP_DIV_OR_SQRT - - SVE_DIV_OR_SQRT Latencies: 0: Instruction-Groups: @@ -310,13 +282,6 @@ Latencies: # Kept to catch edge cases. Execution-Latency: 9 Execution-Throughput: 1 - 13: - Instruction-Groups: - - LOAD_SME - - STORE_ADDRESS_SME - # Same as SVE LD/STR - Execution-Latency: 6 - Execution-Throughput: 1 # CPU-Info mainly used to generate a replica of the special (or system) file directory # structure CPU-Info: From 7882953398ac3e4f281d5076f6d22f622da66389 Mon Sep 17 00:00:00 2001 From: Jack Jones Date: Wed, 19 Jul 2023 19:07:57 +0100 Subject: [PATCH 2/9] Reversed LSQ restrictions to apply requests per cycle to targets not instructions --- src/lib/pipeline/LoadStoreQueue.cc | 94 +++++++++++++++--------------- 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/src/lib/pipeline/LoadStoreQueue.cc b/src/lib/pipeline/LoadStoreQueue.cc index 7844e465cd..5afa187b23 100644 --- a/src/lib/pipeline/LoadStoreQueue.cc +++ b/src/lib/pipeline/LoadStoreQueue.cc @@ -433,55 +433,57 @@ void LoadStoreQueue::tick() { // Iterate over requests ready this cycle while (itInsn != itReq->second.end()) { - // Speculatively increment count of this request type - reqCounts[isStore]++; - - // Ensure the limit on the number of permitted operations is adhered - // to - if (reqCounts[isStore] + reqCounts[!isStore] > totalLimit_) { - // No more requests can be scheduled this cycle - exceededLimits = {true, true}; - break; - } else if (reqCounts[isStore] > reqLimits_[isStore]) { - // No more requests of this type can be scheduled this cycle - exceededLimits[isStore] = true; - // Remove speculative increment to ensure it doesn't count for - // comparisons aginast the totalLimit_ - reqCounts[isStore]--; - break; - } else { - // Schedule requests from the queue of addresses in - // request[Load|Store]Queue_ entry - auto& addressQueue = itInsn->reqAddresses; - while (addressQueue.size()) { - const simeng::MemoryAccessTarget req = addressQueue.front(); - - // Ensure the limit on the data transfered per cycle is adhered to - assert(req.size <= bandwidth && - "Individual memory request from LoadStoreQueue exceeds L1 " - "bandwidth set and thus will never be submitted"); - dataTransfered[isStore] += req.size; - if (dataTransfered[isStore] > bandwidth) { - // No more requests can be scheduled this cycle - exceededLimits[isStore] = true; - itInsn = itReq->second.end(); - break; - } - - // Request a read from the memory interface if the requestQueue_ - // entry represents a read - if (!isStore) { - memory_.requestRead(req, itInsn->insn->getSequenceId()); - } + // Schedule requests from the queue of addresses in + // request[Load|Store]Queue_ entry + auto& addressQueue = itInsn->reqAddresses; + while (addressQueue.size()) { + const simeng::MemoryAccessTarget req = + addressQueue.front(); // Speculatively increment count of this + // request type + reqCounts[isStore]++; + + // Ensure the limit on the number of permitted operations is adhered + // to + if (reqCounts[isStore] + reqCounts[!isStore] > totalLimit_) { + // No more requests can be scheduled this cycle + exceededLimits = {true, true}; + itInsn = itReq->second.end(); + break; + } else if (reqCounts[isStore] > reqLimits_[isStore]) { + // No more requests of this type can be scheduled this cycle + exceededLimits[isStore] = true; + // Remove speculative increment to ensure it doesn't count for + // comparisons aginast the totalLimit_ + reqCounts[isStore]--; + itInsn = itReq->second.end(); + break; + } - // Remove processed address from queue - addressQueue.pop(); + // Ensure the limit on the data transfered per cycle is adhered to + assert(req.size <= bandwidth && + "Individual memory request from LoadStoreQueue exceeds L1 " + "bandwidth set and thus will never be submitted"); + dataTransfered[isStore] += req.size; + if (dataTransfered[isStore] > bandwidth) { + // No more requests can be scheduled this cycle + exceededLimits[isStore] = true; + itInsn = itReq->second.end(); + break; } - // Remove entry from vector iff all of its requests have been - // scheduled - if (addressQueue.size() == 0) { - itInsn = itReq->second.erase(itInsn); + + // Request a read from the memory interface if the requestQueue_ + // entry represents a read + if (!isStore) { + memory_.requestRead(req, itInsn->insn->getSequenceId()); } + + // Remove processed address from queue + addressQueue.pop(); + } + // Remove entry from vector iff all of its requests have been + // scheduled + if (addressQueue.size() == 0) { + itInsn = itReq->second.erase(itInsn); } } From 84ca576ad1920e84c68ffa1603ea5e6a802973c9 Mon Sep 17 00:00:00 2001 From: Jack Jones Date: Mon, 31 Jul 2023 10:45:53 +0100 Subject: [PATCH 3/9] Fixed some latency values in a64fx config file --- configs/a64fx.yaml | 125 ++++++++++++++++++++++++++++++++++++++++- src/lib/ModelConfig.cc | 5 +- 2 files changed, 126 insertions(+), 4 deletions(-) diff --git a/configs/a64fx.yaml b/configs/a64fx.yaml index 1ace725d60..f8d45f0bba 100644 --- a/configs/a64fx.yaml +++ b/configs/a64fx.yaml @@ -10,7 +10,7 @@ Core: # Timer-Frequency is in MHz. Timer-Frequency: 100 Micro-Operations: True - Vector-Length: 512 + Vector-Length: 128 Fetch: Fetch-Block-Size: 32 Loop-Buffer-Size: 48 @@ -215,7 +215,9 @@ Latencies: 6: Instruction-Groups: - VECTOR_SIMPLE + - FP_MUL - SVE_SIMPLE + - SVE_MUL - SCALAR_SIMPLE_CVT - FP_MUL - SVE_MUL @@ -245,6 +247,123 @@ Latencies: - STORE_ADDRESS_SVE Execution-Latency: 6 Execution-Throughput: 1 + 11: + Instruction-Groups: + - ~1922, + - ~1924, + - ~1926, + - ~2359, + - ~2360, + - ~2361, + - ~2364, + - ~2365, + - ~2368, + - ~2369, + - ~2371, + - ~2390, + - ~2391, + - ~2392, + - ~2395, + - ~2396, + - ~2399, + - ~2400, + - ~2402, + - ~2445, + - ~2446, + - ~2447, + - ~2450, + - ~2451, + - ~2454, + - ~2455, + - ~2457, + - ~2470, + - ~2471, + - ~2472, + - ~2475, + - ~2476, + - ~2479, + - ~2480, + - ~2482, + - ~3627, + - ~3629, + - ~3631, + - ~3633, + - ~3644, + - ~3646, + - ~3648, + - ~3650, + - ~3709, + - ~3711, + - ~3713, + - ~3715, + - ~4306, + - ~4308, + - ~4310, + - ~4312, + - ~4326, + - ~4328, + - ~4330, + - ~4332, + - ~4372, + - ~4374, + - ~4376, + - ~4378, + - ~4468, + - ~4469, + - ~4470, + - ~4472, + - ~4474, + - ~4476, + - ~4493, + - ~4494, + - ~4495, + - ~4497, + - ~4499, + - ~4501, + - ~4511, + - ~4513, + - ~4515, + - ~4517, + - ~4519, + - ~4521, + - ~4534, + - ~4535, + - ~4536, + - ~4538, + - ~4540, + - ~4542, + - ~4594, + - ~4595, + - ~4599, + - ~4601, + - ~4603, + - ~4605, + - ~4613, + - ~4614, + - ~4618, + - ~4620, + - ~4622, + - ~4624, + - ~4633, + - ~4635, + - ~4637, + - ~4639, + - ~4641, + - ~4643, + - ~5760, + - ~5762, + - ~5764, + - ~5766, + - ~5780, + - ~5782, + - ~5784, + - ~5786, + - ~5824, + - ~5826, + - ~5828, + - ~5830 + Execution-Latency: 15 + Execution-Throughput: 1 # CPU-Info mainly used to generate a replica of the special (or system) file directory # structure CPU-Info: @@ -259,8 +378,8 @@ CPU-Info: SMT: 1 # Below are the values needed to generate /proc/cpuinfo BogoMIPS: 200.00 - Features: fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm fcma dcpop sve - CPU-Implementer: "0x46" + Features: fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm fcma dcpop + CPU-Implementer: "0x43" CPU-Architecture: 8 CPU-Variant: "0x1" CPU-Part: "0x001" diff --git a/src/lib/ModelConfig.cc b/src/lib/ModelConfig.cc index 84e71ced5b..19c07d0213 100644 --- a/src/lib/ModelConfig.cc +++ b/src/lib/ModelConfig.cc @@ -513,7 +513,10 @@ void ModelConfig::validate() { // AArch64_INSTRUCTION_LIST_END boundChecker(configFile_[root][i]["Instruction-Opcode"][opcodeIndex], (std::string(latNum) + std::string(grpNum)), - std::make_pair(0, 4516), ExpectedValue::UInteger); + std::make_pair( + 0, static_cast( + AARCH64Opcode::AArch64_INSTRUCTION_LIST_END)), + ExpectedValue::UInteger); opcodeIndex++; } else if (nodeChecker( grpNode[j], (std::string(latNum) + std::string(grpNum)), From c39f3b815d5be0635eb16da012e9d3486ae07429 Mon Sep 17 00:00:00 2001 From: Jack Jones Date: Mon, 31 Jul 2023 10:48:06 +0100 Subject: [PATCH 4/9] Reverted accidental changes --- configs/a64fx.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/a64fx.yaml b/configs/a64fx.yaml index f8d45f0bba..5c1dba88c8 100644 --- a/configs/a64fx.yaml +++ b/configs/a64fx.yaml @@ -10,7 +10,7 @@ Core: # Timer-Frequency is in MHz. Timer-Frequency: 100 Micro-Operations: True - Vector-Length: 128 + Vector-Length: 512 Fetch: Fetch-Block-Size: 32 Loop-Buffer-Size: 48 @@ -378,8 +378,8 @@ CPU-Info: SMT: 1 # Below are the values needed to generate /proc/cpuinfo BogoMIPS: 200.00 - Features: fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm fcma dcpop - CPU-Implementer: "0x43" + Features: fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm fcma dcpop sve + CPU-Implementer: "0x46" CPU-Architecture: 8 CPU-Variant: "0x1" CPU-Part: "0x001" From 815b8f6ff5e20218902a6c71f80940700fdb3688 Mon Sep 17 00:00:00 2001 From: jj16791 Date: Mon, 14 Aug 2023 23:59:47 +0100 Subject: [PATCH 5/9] Copy of updated a64fx latency values into sme counterpart --- configs/a64fx_SME.yaml | 119 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 1 deletion(-) diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml index 886ec8ad97..a12c0aef7c 100644 --- a/configs/a64fx_SME.yaml +++ b/configs/a64fx_SME.yaml @@ -266,13 +266,130 @@ Latencies: Execution-Latency: 6 Execution-Throughput: 1 11: + Instruction-Groups: + - ~1922, + - ~1924, + - ~1926, + - ~2359, + - ~2360, + - ~2361, + - ~2364, + - ~2365, + - ~2368, + - ~2369, + - ~2371, + - ~2390, + - ~2391, + - ~2392, + - ~2395, + - ~2396, + - ~2399, + - ~2400, + - ~2402, + - ~2445, + - ~2446, + - ~2447, + - ~2450, + - ~2451, + - ~2454, + - ~2455, + - ~2457, + - ~2470, + - ~2471, + - ~2472, + - ~2475, + - ~2476, + - ~2479, + - ~2480, + - ~2482, + - ~3627, + - ~3629, + - ~3631, + - ~3633, + - ~3644, + - ~3646, + - ~3648, + - ~3650, + - ~3709, + - ~3711, + - ~3713, + - ~3715, + - ~4306, + - ~4308, + - ~4310, + - ~4312, + - ~4326, + - ~4328, + - ~4330, + - ~4332, + - ~4372, + - ~4374, + - ~4376, + - ~4378, + - ~4468, + - ~4469, + - ~4470, + - ~4472, + - ~4474, + - ~4476, + - ~4493, + - ~4494, + - ~4495, + - ~4497, + - ~4499, + - ~4501, + - ~4511, + - ~4513, + - ~4515, + - ~4517, + - ~4519, + - ~4521, + - ~4534, + - ~4535, + - ~4536, + - ~4538, + - ~4540, + - ~4542, + - ~4594, + - ~4595, + - ~4599, + - ~4601, + - ~4603, + - ~4605, + - ~4613, + - ~4614, + - ~4618, + - ~4620, + - ~4622, + - ~4624, + - ~4633, + - ~4635, + - ~4637, + - ~4639, + - ~4641, + - ~4643, + - ~5760, + - ~5762, + - ~5764, + - ~5766, + - ~5780, + - ~5782, + - ~5784, + - ~5786, + - ~5824, + - ~5826, + - ~5828, + - ~5830 + Execution-Latency: 15 + Execution-Throughput: 1 + 12: Instruction-Groups: - SME_SIMPLE_LOGICAL - SME_SIMPLE_CMP # Same as SVE Execution-Latency: 4 Execution-Throughput: 1 - 12: + 13: Instruction-Groups: - SME_SIMPLE - SME_DIV_OR_SQRT From e584cd6430172656d84432bf3680c25367b92804 Mon Sep 17 00:00:00 2001 From: jj16791 Date: Mon, 9 Oct 2023 12:35:17 +0100 Subject: [PATCH 6/9] Changes to relfect PR comments --- configs/a64fx.yaml | 9 ++++----- configs/a64fx_SME.yaml | 9 +++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/configs/a64fx.yaml b/configs/a64fx.yaml index 5c1dba88c8..4cb3bb5c7f 100644 --- a/configs/a64fx.yaml +++ b/configs/a64fx.yaml @@ -113,17 +113,17 @@ Reservation-Stations: - EXB 2: Size: 10 - Dispatch-Rate: 2 + Dispatch-Rate: 1 Ports: - EAGA 3: Size: 10 - Dispatch-Rate: 2 + Dispatch-Rate: 1 Ports: - EAGB 4: Size: 19 - Dispatch-Rate: 2 + Dispatch-Rate: 1 Ports: - BR Execution-Units: @@ -215,9 +215,7 @@ Latencies: 6: Instruction-Groups: - VECTOR_SIMPLE - - FP_MUL - SVE_SIMPLE - - SVE_MUL - SCALAR_SIMPLE_CVT - FP_MUL - SVE_MUL @@ -247,6 +245,7 @@ Latencies: - STORE_ADDRESS_SVE Execution-Latency: 6 Execution-Throughput: 1 +# Indexed FMLA instructions split into 2 dependent µops. Latency increased to 15 to mimic such behaviour. 11: Instruction-Groups: - ~1922, diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml index a12c0aef7c..fe0515ea59 100644 --- a/configs/a64fx_SME.yaml +++ b/configs/a64fx_SME.yaml @@ -119,22 +119,22 @@ Reservation-Stations: - EXB 2: Size: 10 - Dispatch-Rate: 2 + Dispatch-Rate: 1 Ports: - EAGA 3: Size: 10 - Dispatch-Rate: 2 + Dispatch-Rate: 1 Ports: - EAGB 4: Size: 19 - Dispatch-Rate: 2 + Dispatch-Rate: 1 Ports: - BR 5: Size: 20 - Dispatch-Rate: 2 + Dispatch-Rate: 1 Ports: - SME Execution-Units: @@ -265,6 +265,7 @@ Latencies: - STORE_ADDRESS_SME Execution-Latency: 6 Execution-Throughput: 1 +# Indexed FMLA instructions split into 2 dependent µops. Latency increased to 15 to mimic such behaviour. 11: Instruction-Groups: - ~1922, From 35155bd190c9a27b88cffa540f805a07643b3e59 Mon Sep 17 00:00:00 2001 From: jj16791 Date: Mon, 9 Oct 2023 12:48:46 +0100 Subject: [PATCH 7/9] Typo fix --- src/lib/pipeline/LoadStoreQueue.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/pipeline/LoadStoreQueue.cc b/src/lib/pipeline/LoadStoreQueue.cc index 5afa187b23..7a93f39f65 100644 --- a/src/lib/pipeline/LoadStoreQueue.cc +++ b/src/lib/pipeline/LoadStoreQueue.cc @@ -453,7 +453,7 @@ void LoadStoreQueue::tick() { // No more requests of this type can be scheduled this cycle exceededLimits[isStore] = true; // Remove speculative increment to ensure it doesn't count for - // comparisons aginast the totalLimit_ + // comparisons against the totalLimit_ reqCounts[isStore]--; itInsn = itReq->second.end(); break; From 868f963bc8eb9d9a75d2617949bce4fa830c1dd6 Mon Sep 17 00:00:00 2001 From: jj16791 Date: Wed, 11 Oct 2023 12:37:01 +0100 Subject: [PATCH 8/9] Changes to a64fx config files to match requests from PR --- configs/a64fx.yaml | 227 ++++++++++++++++++------------------ configs/a64fx_SME.yaml | 256 ++++++++++++++++++++--------------------- 2 files changed, 236 insertions(+), 247 deletions(-) diff --git a/configs/a64fx.yaml b/configs/a64fx.yaml index 4cb3bb5c7f..0c3ac7e0df 100644 --- a/configs/a64fx.yaml +++ b/configs/a64fx.yaml @@ -245,121 +245,122 @@ Latencies: - STORE_ADDRESS_SVE Execution-Latency: 6 Execution-Throughput: 1 -# Indexed FMLA instructions split into 2 dependent µops. Latency increased to 15 to mimic such behaviour. +# Indexed FMLA instructions split into 2 dependent µops. Latency increased to 15 to mimic such behaviour +# NOTE: Any changes to the capstone opcode list could invalidate the mapping between ARM instructions and the values below 11: Instruction-Groups: - - ~1922, - - ~1924, - - ~1926, - - ~2359, - - ~2360, - - ~2361, - - ~2364, - - ~2365, - - ~2368, - - ~2369, - - ~2371, - - ~2390, - - ~2391, - - ~2392, - - ~2395, - - ~2396, - - ~2399, - - ~2400, - - ~2402, - - ~2445, - - ~2446, - - ~2447, - - ~2450, - - ~2451, - - ~2454, - - ~2455, - - ~2457, - - ~2470, - - ~2471, - - ~2472, - - ~2475, - - ~2476, - - ~2479, - - ~2480, - - ~2482, - - ~3627, - - ~3629, - - ~3631, - - ~3633, - - ~3644, - - ~3646, - - ~3648, - - ~3650, - - ~3709, - - ~3711, - - ~3713, - - ~3715, - - ~4306, - - ~4308, - - ~4310, - - ~4312, - - ~4326, - - ~4328, - - ~4330, - - ~4332, - - ~4372, - - ~4374, - - ~4376, - - ~4378, - - ~4468, - - ~4469, - - ~4470, - - ~4472, - - ~4474, - - ~4476, - - ~4493, - - ~4494, - - ~4495, - - ~4497, - - ~4499, - - ~4501, - - ~4511, - - ~4513, - - ~4515, - - ~4517, - - ~4519, - - ~4521, - - ~4534, - - ~4535, - - ~4536, - - ~4538, - - ~4540, - - ~4542, - - ~4594, - - ~4595, - - ~4599, - - ~4601, - - ~4603, - - ~4605, - - ~4613, - - ~4614, - - ~4618, - - ~4620, - - ~4622, - - ~4624, - - ~4633, - - ~4635, - - ~4637, - - ~4639, - - ~4641, - - ~4643, - - ~5760, - - ~5762, - - ~5764, - - ~5766, - - ~5780, - - ~5782, - - ~5784, - - ~5786, - - ~5824, - - ~5826, - - ~5828, + - ~1922 + - ~1924 + - ~1926 + - ~2359 + - ~2360 + - ~2361 + - ~2364 + - ~2365 + - ~2368 + - ~2369 + - ~2371 + - ~2390 + - ~2391 + - ~2392 + - ~2395 + - ~2396 + - ~2399 + - ~2400 + - ~2402 + - ~2445 + - ~2446 + - ~2447 + - ~2450 + - ~2451 + - ~2454 + - ~2455 + - ~2457 + - ~2470 + - ~2471 + - ~2472 + - ~2475 + - ~2476 + - ~2479 + - ~2480 + - ~2482 + - ~3627 + - ~3629 + - ~3631 + - ~3633 + - ~3644 + - ~3646 + - ~3648 + - ~3650 + - ~3709 + - ~3711 + - ~3713 + - ~3715 + - ~4306 + - ~4308 + - ~4310 + - ~4312 + - ~4326 + - ~4328 + - ~4330 + - ~4332 + - ~4372 + - ~4374 + - ~4376 + - ~4378 + - ~4468 + - ~4469 + - ~4470 + - ~4472 + - ~4474 + - ~4476 + - ~4493 + - ~4494 + - ~4495 + - ~4497 + - ~4499 + - ~4501 + - ~4511 + - ~4513 + - ~4515 + - ~4517 + - ~4519 + - ~4521 + - ~4534 + - ~4535 + - ~4536 + - ~4538 + - ~4540 + - ~4542 + - ~4594 + - ~4595 + - ~4599 + - ~4601 + - ~4603 + - ~4605 + - ~4613 + - ~4614 + - ~4618 + - ~4620 + - ~4622 + - ~4624 + - ~4633 + - ~4635 + - ~4637 + - ~4639 + - ~4641 + - ~4643 + - ~5760 + - ~5762 + - ~5764 + - ~5766 + - ~5780 + - ~5782 + - ~5784 + - ~5786 + - ~5824 + - ~5826 + - ~5828 - ~5830 Execution-Latency: 15 Execution-Throughput: 1 diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml index fe0515ea59..9a7c29f94c 100644 --- a/configs/a64fx_SME.yaml +++ b/configs/a64fx_SME.yaml @@ -220,8 +220,10 @@ Latencies: - SCALAR_SIMPLE - VECTOR_SIMPLE_LOGICAL - SVE_SIMPLE_LOGICAL + - SME_SIMPLE_LOGICAL - VECTOR_SIMPLE_CMP - SVE_SIMPLE_CMP + - SME_SIMPLE_CMP Execution-Latency: 4 Execution-Throughput: 1 5: @@ -230,18 +232,20 @@ Latencies: Execution-Latency: 29 Execution-Throughput: 29 6: - Instruction-Groups: + Instruction-Groups: + - SCALAR_SIMPLE_CVT - VECTOR_SIMPLE - SVE_SIMPLE - - SCALAR_SIMPLE_CVT + - SME_SIMPLE - FP_MUL - SVE_MUL - - SME + - SME_MUL Execution-Latency: 9 Execution-Throughput: 1 7: Instruction-Groups: - SVE_DIV_OR_SQRT + - SME_DIV_OR_SQRT Execution-Latency: 98 Execution-Throughput: 98 8: @@ -260,146 +264,130 @@ Latencies: 10: Instruction-Groups: - LOAD_SVE - - STORE_ADDRESS_SVE - LOAD_SME + - STORE_ADDRESS_SVE - STORE_ADDRESS_SME Execution-Latency: 6 Execution-Throughput: 1 -# Indexed FMLA instructions split into 2 dependent µops. Latency increased to 15 to mimic such behaviour. +# Indexed FMLA instructions split into 2 dependent µops. Latency increased to 15 to mimic such behaviour +# NOTE: Any changes to the capstone opcode list could invalidate the mapping between ARM instructions and the values below 11: Instruction-Groups: - - ~1922, - - ~1924, - - ~1926, - - ~2359, - - ~2360, - - ~2361, - - ~2364, - - ~2365, - - ~2368, - - ~2369, - - ~2371, - - ~2390, - - ~2391, - - ~2392, - - ~2395, - - ~2396, - - ~2399, - - ~2400, - - ~2402, - - ~2445, - - ~2446, - - ~2447, - - ~2450, - - ~2451, - - ~2454, - - ~2455, - - ~2457, - - ~2470, - - ~2471, - - ~2472, - - ~2475, - - ~2476, - - ~2479, - - ~2480, - - ~2482, - - ~3627, - - ~3629, - - ~3631, - - ~3633, - - ~3644, - - ~3646, - - ~3648, - - ~3650, - - ~3709, - - ~3711, - - ~3713, - - ~3715, - - ~4306, - - ~4308, - - ~4310, - - ~4312, - - ~4326, - - ~4328, - - ~4330, - - ~4332, - - ~4372, - - ~4374, - - ~4376, - - ~4378, - - ~4468, - - ~4469, - - ~4470, - - ~4472, - - ~4474, - - ~4476, - - ~4493, - - ~4494, - - ~4495, - - ~4497, - - ~4499, - - ~4501, - - ~4511, - - ~4513, - - ~4515, - - ~4517, - - ~4519, - - ~4521, - - ~4534, - - ~4535, - - ~4536, - - ~4538, - - ~4540, - - ~4542, - - ~4594, - - ~4595, - - ~4599, - - ~4601, - - ~4603, - - ~4605, - - ~4613, - - ~4614, - - ~4618, - - ~4620, - - ~4622, - - ~4624, - - ~4633, - - ~4635, - - ~4637, - - ~4639, - - ~4641, - - ~4643, - - ~5760, - - ~5762, - - ~5764, - - ~5766, - - ~5780, - - ~5782, - - ~5784, - - ~5786, - - ~5824, - - ~5826, - - ~5828, + - ~1922 + - ~1924 + - ~1926 + - ~2359 + - ~2360 + - ~2361 + - ~2364 + - ~2365 + - ~2368 + - ~2369 + - ~2371 + - ~2390 + - ~2391 + - ~2392 + - ~2395 + - ~2396 + - ~2399 + - ~2400 + - ~2402 + - ~2445 + - ~2446 + - ~2447 + - ~2450 + - ~2451 + - ~2454 + - ~2455 + - ~2457 + - ~2470 + - ~2471 + - ~2472 + - ~2475 + - ~2476 + - ~2479 + - ~2480 + - ~2482 + - ~3627 + - ~3629 + - ~3631 + - ~3633 + - ~3644 + - ~3646 + - ~3648 + - ~3650 + - ~3709 + - ~3711 + - ~3713 + - ~3715 + - ~4306 + - ~4308 + - ~4310 + - ~4312 + - ~4326 + - ~4328 + - ~4330 + - ~4332 + - ~4372 + - ~4374 + - ~4376 + - ~4378 + - ~4468 + - ~4469 + - ~4470 + - ~4472 + - ~4474 + - ~4476 + - ~4493 + - ~4494 + - ~4495 + - ~4497 + - ~4499 + - ~4501 + - ~4511 + - ~4513 + - ~4515 + - ~4517 + - ~4519 + - ~4521 + - ~4534 + - ~4535 + - ~4536 + - ~4538 + - ~4540 + - ~4542 + - ~4594 + - ~4595 + - ~4599 + - ~4601 + - ~4603 + - ~4605 + - ~4613 + - ~4614 + - ~4618 + - ~4620 + - ~4622 + - ~4624 + - ~4633 + - ~4635 + - ~4637 + - ~4639 + - ~4641 + - ~4643 + - ~5760 + - ~5762 + - ~5764 + - ~5766 + - ~5780 + - ~5782 + - ~5784 + - ~5786 + - ~5824 + - ~5826 + - ~5828 - ~5830 Execution-Latency: 15 Execution-Throughput: 1 - 12: - Instruction-Groups: - - SME_SIMPLE_LOGICAL - - SME_SIMPLE_CMP - # Same as SVE - Execution-Latency: 4 - Execution-Throughput: 1 - 13: - Instruction-Groups: - - SME_SIMPLE - - SME_DIV_OR_SQRT - - SME_MUL - # SME_MUL Used only by outer-product instructions - # Same as SVE. No SME DIV or SQRT so classification to this group should be impossible. - # Kept to catch edge cases. - Execution-Latency: 9 - Execution-Throughput: 1 # CPU-Info mainly used to generate a replica of the special (or system) file directory # structure CPU-Info: From 9dce6e91e2e5b822afc322ba3012689743e6ff3b Mon Sep 17 00:00:00 2001 From: jj16791 Date: Wed, 11 Oct 2023 14:39:26 +0100 Subject: [PATCH 9/9] comment fix --- src/lib/pipeline/LoadStoreQueue.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/pipeline/LoadStoreQueue.cc b/src/lib/pipeline/LoadStoreQueue.cc index 7a93f39f65..c7a93ba633 100644 --- a/src/lib/pipeline/LoadStoreQueue.cc +++ b/src/lib/pipeline/LoadStoreQueue.cc @@ -480,7 +480,7 @@ void LoadStoreQueue::tick() { // Remove processed address from queue addressQueue.pop(); } - // Remove entry from vector iff all of its requests have been + // Remove entry from vector if all of its requests have been // scheduled if (addressQueue.size() == 0) { itInsn = itReq->second.erase(itInsn);