diff --git a/src/include/simeng/arch/Architecture.hh b/src/include/simeng/arch/Architecture.hh index 9a2d4c69bc..f7f163815c 100644 --- a/src/include/simeng/arch/Architecture.hh +++ b/src/include/simeng/arch/Architecture.hh @@ -65,7 +65,7 @@ class Architecture { * Writes into the supplied macro-op vector, and returns the number of bytes * consumed to produce it; a value of 0 indicates too few bytes were present * for a valid decoding. */ - virtual uint8_t predecode(const void* ptr, uint8_t bytesAvailable, + virtual uint8_t predecode(const void* ptr, uint16_t bytesAvailable, uint64_t instructionAddress, MacroOp& output) const = 0; diff --git a/src/include/simeng/arch/aarch64/Architecture.hh b/src/include/simeng/arch/aarch64/Architecture.hh index 1204b1f915..181124eb56 100644 --- a/src/include/simeng/arch/aarch64/Architecture.hh +++ b/src/include/simeng/arch/aarch64/Architecture.hh @@ -24,7 +24,7 @@ class Architecture : public arch::Architecture { /** Pre-decode instruction memory into a macro-op of `Instruction` * instances. Returns the number of bytes consumed to produce it (always 4), * and writes into the supplied macro-op vector. */ - uint8_t predecode(const void* ptr, uint8_t bytesAvailable, + uint8_t predecode(const void* ptr, uint16_t bytesAvailable, uint64_t instructionAddress, MacroOp& output) const override; diff --git a/src/include/simeng/arch/riscv/Architecture.hh b/src/include/simeng/arch/riscv/Architecture.hh index 5d5d17c185..16489fc643 100644 --- a/src/include/simeng/arch/riscv/Architecture.hh +++ b/src/include/simeng/arch/riscv/Architecture.hh @@ -23,7 +23,7 @@ class Architecture : public arch::Architecture { /** Pre-decode instruction memory into a macro-op of `Instruction` * instances. Returns the number of bytes consumed to produce it (always 4), * and writes into the supplied macro-op vector. */ - uint8_t predecode(const void* ptr, uint8_t bytesAvailable, + uint8_t predecode(const void* ptr, uint16_t bytesAvailable, uint64_t instructionAddress, MacroOp& output) const override; diff --git a/src/include/simeng/pipeline/FetchUnit.hh b/src/include/simeng/pipeline/FetchUnit.hh index c09f830654..aac4e47f62 100644 --- a/src/include/simeng/pipeline/FetchUnit.hh +++ b/src/include/simeng/pipeline/FetchUnit.hh @@ -115,7 +115,7 @@ class FetchUnit { uint8_t* fetchBuffer_; /** The amount of data currently in the fetch buffer. */ - uint8_t bufferedBytes_ = 0; + uint16_t bufferedBytes_ = 0; }; } // namespace pipeline diff --git a/src/lib/arch/aarch64/Architecture.cc b/src/lib/arch/aarch64/Architecture.cc index 5fa77159b4..b72d6654b5 100644 --- a/src/lib/arch/aarch64/Architecture.cc +++ b/src/lib/arch/aarch64/Architecture.cc @@ -144,7 +144,7 @@ Architecture::~Architecture() { SVCRval_ = 0; } -uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, +uint8_t Architecture::predecode(const void* ptr, uint16_t bytesAvailable, uint64_t instructionAddress, MacroOp& output) const { // Check that instruction address is 4-byte aligned as required by Armv9.2-a diff --git a/src/lib/arch/riscv/Architecture.cc b/src/lib/arch/riscv/Architecture.cc index b641f8fbaa..8332228685 100644 --- a/src/lib/arch/riscv/Architecture.cc +++ b/src/lib/arch/riscv/Architecture.cc @@ -144,7 +144,7 @@ Architecture::~Architecture() { groupExecutionInfo_.clear(); } -uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, +uint8_t Architecture::predecode(const void* ptr, uint16_t bytesAvailable, uint64_t instructionAddress, MacroOp& output) const { // Check that instruction address is 4-byte aligned as required by RISC-V diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc index 697c14c7c4..e94cc52895 100644 --- a/src/lib/config/ModelConfig.cc +++ b/src/lib/config/ModelConfig.cc @@ -318,7 +318,7 @@ void ModelConfig::setExpectations(bool isDefault) { expectations_["Fetch"].addChild( ExpectationNode::createExpectation(32, "Fetch-Block-Size")); expectations_["Fetch"]["Fetch-Block-Size"].setValueSet(std::vector{ - 4, 8, 16, 32, 64, 128, 256, 512, 1024, 4096, 8192, 16384, 32768}); + 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768}); expectations_["Fetch"].addChild( ExpectationNode::createExpectation(32, "Loop-Buffer-Size")); @@ -346,17 +346,19 @@ void ModelConfig::setExpectations(bool isDefault) { // Register-Set expectations_.addChild(ExpectationNode::createExpectation("Register-Set")); if (isa_ == ISA::AArch64) { + // TODO: Reduce to 32 once renaming issue has been sorted expectations_["Register-Set"].addChild( - ExpectationNode::createExpectation(32, + ExpectationNode::createExpectation(38, "GeneralPurpose-Count")); expectations_["Register-Set"]["GeneralPurpose-Count"] - .setValueBounds(32, UINT16_MAX); + .setValueBounds(38, UINT16_MAX); + // TODO: Reduce to 32 once renaming issue has been sorted expectations_["Register-Set"].addChild( ExpectationNode::createExpectation( - 32, "FloatingPoint/SVE-Count")); + 38, "FloatingPoint/SVE-Count")); expectations_["Register-Set"]["FloatingPoint/SVE-Count"] - .setValueBounds(32, UINT16_MAX); + .setValueBounds(38, UINT16_MAX); expectations_["Register-Set"].addChild( ExpectationNode::createExpectation(17, "Predicate-Count", @@ -377,14 +379,16 @@ void ModelConfig::setExpectations(bool isDefault) { expectations_["Register-Set"].addChild( ExpectationNode::createExpectation(32, "GeneralPurpose-Count")); + // TODO: Reduce to 32 once renaming issue has been sorted expectations_["Register-Set"]["GeneralPurpose-Count"] - .setValueBounds(32, UINT16_MAX); + .setValueBounds(38, UINT16_MAX); expectations_["Register-Set"].addChild( ExpectationNode::createExpectation(32, "FloatingPoint-Count")); + // TODO: Reduce to 32 once renaming issue has been sorted expectations_["Register-Set"]["FloatingPoint-Count"] - .setValueBounds(32, UINT16_MAX); + .setValueBounds(38, UINT16_MAX); } // Pipeline-Widths @@ -485,13 +489,24 @@ void ModelConfig::setExpectations(bool isDefault) { expectations_["LSQ-L1-Interface"].addChild( ExpectationNode::createExpectation(32, "Load-Bandwidth")); - expectations_["LSQ-L1-Interface"]["Load-Bandwidth"].setValueBounds( - 1, UINT16_MAX); expectations_["LSQ-L1-Interface"].addChild( ExpectationNode::createExpectation(32, "Store-Bandwidth")); - expectations_["LSQ-L1-Interface"]["Store-Bandwidth"].setValueBounds( - 1, UINT16_MAX); + + // AArch64 requires a vector length of at least 128, requiring a minimum of 16 + // byte load/store bandwidths + // For RV64, the the minimum required load/store bandwidth is 8 bytes + if (isa_ == ISA::AArch64) { + expectations_["LSQ-L1-Interface"]["Load-Bandwidth"] + .setValueBounds(16, UINT16_MAX); + expectations_["LSQ-L1-Interface"]["Store-Bandwidth"] + .setValueBounds(16, UINT16_MAX); + } else if (isa_ == ISA::RV64) { + expectations_["LSQ-L1-Interface"]["Store-Bandwidth"] + .setValueBounds(8, UINT16_MAX); + expectations_["LSQ-L1-Interface"]["Load-Bandwidth"] + .setValueBounds(8, UINT16_MAX); + } expectations_["LSQ-L1-Interface"].addChild( ExpectationNode::createExpectation( @@ -926,6 +941,56 @@ void ModelConfig::postValidation() { invalid_ << "\t- Only a 'Flat' L1-Instruction-Memory Interface-Type is " "supported. Interface-Type used is " << l1iType << "\n"; + + if (isa_ == ISA::AArch64) { + // Ensure LSQ-L1-Interface Load/Store Bandwidth is large enough to + // accomodate a full vector load of the specified Vector-Length parameter + if (configTree_["Core"]["Vector-Length"].as() / 8 > + configTree_["LSQ-L1-Interface"]["Load-Bandwidth"].as()) { + invalid_ + << "\t- Load-Bandwidth (bytes) must be greater than Vector-Length " + "(bits). " + "The current Load-Bandwidth is set to " + << configTree_["LSQ-L1-Interface"]["Load-Bandwidth"].as() + << " bytes, when it must be at least " + << configTree_["Core"]["Vector-Length"].as() / 8 << "\n"; + } + if (configTree_["Core"]["Vector-Length"].as() / 8 > + configTree_["LSQ-L1-Interface"]["Store-Bandwidth"].as()) { + invalid_ + << "\t- Store-Bandwidth (bytes) must be greater than Vector-Length " + "(bits). " + "The current Store-Bandwidth is set to " + << configTree_["LSQ-L1-Interface"]["Store-Bandwidth"].as() + << " bytes, when it must be at least " + << configTree_["Core"]["Vector-Length"].as() / 8 << "\n"; + } + // Ensure LSQ-L1-Interface Load/Store Bandwidth is also large enough to + // accomodate a full vector load of the specified Streaming-Vector-Length + // parameter when streaming mode is enabled + if (configTree_["Core"]["Streaming-Vector-Length"].as() / 8 > + configTree_["LSQ-L1-Interface"]["Load-Bandwidth"].as()) { + invalid_ + << "\t- Load-Bandwidth (bytes) must be greater than " + "Streaming-Vector-Length (bits). " + "The current Load-Bandwidth is set to " + << configTree_["LSQ-L1-Interface"]["Load-Bandwidth"].as() + << " bytes, when it must be at least " + << configTree_["Core"]["Streaming-Vector-Length"].as() / 8 + << "\n"; + } + if (configTree_["Core"]["Streaming-Vector-Length"].as() / 8 > + configTree_["LSQ-L1-Interface"]["Store-Bandwidth"].as()) { + invalid_ + << "\t- Store-Bandwidth (bytes) must be greater than " + "Streaming-Vector-Length (bits). " + "The current Store-Bandwidth is set to " + << configTree_["LSQ-L1-Interface"]["Store-Bandwidth"].as() + << " bytes, when it must be at least " + << configTree_["Core"]["Streaming-Vector-Length"].as() / 8 + << "\n"; + } + } } ryml::Tree ModelConfig::getConfig() { return configTree_; } diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc index bfd0c6c6f6..16b79eb375 100644 --- a/src/lib/models/emulation/Core.cc +++ b/src/lib/models/emulation/Core.cc @@ -8,7 +8,7 @@ namespace emulation { // TODO: Expose as config option /** The number of bytes fetched each cycle. */ -const uint8_t FETCH_SIZE = 4; +const uint16_t FETCH_SIZE = 4; const unsigned int clockFrequency = 2.5 * 1e9; Core::Core(MemoryInterface& instructionMemory, MemoryInterface& dataMemory, diff --git a/src/lib/pipeline/FetchUnit.cc b/src/lib/pipeline/FetchUnit.cc index fc0bbedc12..2da1ada097 100644 --- a/src/lib/pipeline/FetchUnit.cc +++ b/src/lib/pipeline/FetchUnit.cc @@ -41,7 +41,6 @@ void FetchUnit::tick() { auto bytesRead = isa_.predecode(&(loopBuffer_.front().encoding), loopBuffer_.front().instructionSize, loopBuffer_.front().address, macroOp); - assert(bytesRead != 0 && "predecode failure for loop buffer entry"); // Set prediction to recorded value during loop buffer filling @@ -58,7 +57,7 @@ void FetchUnit::tick() { // Pointer to the instruction data to decode from const uint8_t* buffer; - uint8_t bufferOffset; + uint16_t bufferOffset; // Check if more instruction data is required if (bufferedBytes_ < isa_.getMaxInstructionSize()) { diff --git a/test/regression/aarch64/Exception.cc b/test/regression/aarch64/Exception.cc index 9b27ec4aae..5f8aedccb5 100644 --- a/test/regression/aarch64/Exception.cc +++ b/test/regression/aarch64/Exception.cc @@ -237,13 +237,16 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values( std::make_tuple( EMULATION, - "{Core: {Vector-Length: 512, Streaming-Vector-Length: 1024}}"), + "{Core: {Vector-Length: 512, Streaming-Vector-Length: 1024}, " + "LSQ-L1-Interface: {Load-Bandwidth: 256, Store-Bandwidth: 256}}"), std::make_tuple( INORDER, - "{Core: {Vector-Length: 512, Streaming-Vector-Length: 1024}}"), + "{Core: {Vector-Length: 512, Streaming-Vector-Length: 1024}, " + "LSQ-L1-Interface: {Load-Bandwidth: 256, Store-Bandwidth: 256}}"), std::make_tuple( OUTOFORDER, - "{Core: {Vector-Length: 512, Streaming-Vector-Length: 1024}}")), + "{Core: {Vector-Length: 512, Streaming-Vector-Length: 1024}, " + "LSQ-L1-Interface: {Load-Bandwidth: 256, Store-Bandwidth: 256}}")), paramToString); } // namespace diff --git a/test/unit/MockArchitecture.hh b/test/unit/MockArchitecture.hh index 0df643ee9e..7795571f2f 100644 --- a/test/unit/MockArchitecture.hh +++ b/test/unit/MockArchitecture.hh @@ -9,7 +9,7 @@ namespace simeng { class MockArchitecture : public arch::Architecture { public: MOCK_CONST_METHOD4(predecode, - uint8_t(const void* ptr, uint8_t bytesAvailable, + uint8_t(const void* ptr, uint16_t bytesAvailable, uint64_t instructionAddress, MacroOp& output)); MOCK_CONST_METHOD1(canRename, bool(Register reg)); MOCK_CONST_METHOD1(getSystemRegisterTag, int32_t(uint16_t reg)); diff --git a/test/unit/aarch64/ArchitectureTest.cc b/test/unit/aarch64/ArchitectureTest.cc index 63b2805ce3..29238dd4dc 100644 --- a/test/unit/aarch64/ArchitectureTest.cc +++ b/test/unit/aarch64/ArchitectureTest.cc @@ -32,6 +32,10 @@ class AArch64ArchitectureTest : public testing::Test { Vector-Length: 512, Streaming-Vector-Length: 128 }, + LSQ-L1-Interface: { + Load-Bandwidth: 64, + Store-Bandwidth: 64 + }, Ports: { '0': {Portname: Port 0, Instruction-Group-Support: [FP, SVE]}, '1': {Portname: Port 1, Instruction-Group-Support: [PREDICATE]},