diff --git a/.jenkins/build_test_run.sh b/.jenkins/build_test_run.sh index 4b4f237b3c..52b3a7bdab 100644 --- a/.jenkins/build_test_run.sh +++ b/.jenkins/build_test_run.sh @@ -62,8 +62,8 @@ run () { echo "Simulation without configuration file argument:" cat run echo "" - compare_outputs "$(grep "retired:" run | rev | cut -d ' ' -f1 | rev)" "6708" "retired instructions" - compare_outputs "$(grep "cycles:" run | rev | cut -d ' ' -f1 | rev)" "7955" "simulated cycles" + compare_outputs "$(grep "retired:" run | rev | cut -d ' ' -f1 | rev)" "6721" "retired instructions" + compare_outputs "$(grep "cycles:" run | rev | cut -d ' ' -f1 | rev)" "6721" "simulated cycles" echo "" ./bin/simeng "$SIMENG_TOP"/configs/tx2.yaml > run diff --git a/src/include/simeng/models/emulation/Core.hh b/src/include/simeng/models/emulation/Core.hh index 9f1d4c2c1d..f1e38d7022 100644 --- a/src/include/simeng/models/emulation/Core.hh +++ b/src/include/simeng/models/emulation/Core.hh @@ -59,9 +59,6 @@ class Core : public simeng::Core { /** A reusable macro-op vector to fill with uops. */ MacroOp macroOp_; - /** An internal buffer for storing one or more uops. */ - std::queue> microOps_; - /** The previously generated addresses. */ std::vector previousAddresses_; @@ -71,9 +68,6 @@ class Core : public simeng::Core { /** The length of the available instruction memory. */ uint64_t programByteLength_ = 0; - /** Is the core waiting on a data read? */ - uint64_t pendingReads_ = 0; - /** The number of instructions executed. */ uint64_t instructionsExecuted_ = 0; diff --git a/src/lib/arch/aarch64/Instruction_decode.cc b/src/lib/arch/aarch64/Instruction_decode.cc index bceb3b1488..165c51b9ac 100644 --- a/src/lib/arch/aarch64/Instruction_decode.cc +++ b/src/lib/arch/aarch64/Instruction_decode.cc @@ -487,12 +487,14 @@ void Instruction::decode() { // LDADD* are considered to be both a load and a store if (metadata_.id >= ARM64_INS_LDADD && metadata_.id <= ARM64_INS_LDADDLH) { setInstructionType(InsnType::isLoad); + setInstructionType(InsnType::isStoreData); } // CASAL* are considered to be both a load and a store if (metadata_.opcode == Opcode::AArch64_CASALW || metadata_.opcode == Opcode::AArch64_CASALX) { setInstructionType(InsnType::isLoad); + setInstructionType(InsnType::isStoreData); } if (isInstruction(InsnType::isStoreData)) { diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc index 3998fa91b5..bf0129b5ee 100644 --- a/src/lib/models/emulation/Core.cc +++ b/src/lib/models/emulation/Core.cc @@ -17,6 +17,17 @@ Core::Core(memory::MemoryInterface& instructionMemory, architecturalRegisterFileSet_(registerFileSet_), pc_(entryPoint), programByteLength_(programByteLength) { + // Ensure both interface types are flat + assert( + (config::SimInfo::getConfig()["L1-Data-Memory"]["Interface-Type"] + .as() == "Flat") && + "Emulation core is only compatable with a Flat Data Memory Interface."); + assert( + (config::SimInfo::getConfig()["L1-Instruction-Memory"]["Interface-Type"] + .as() == "Flat") && + "Emulation core is only compatable with a Flat Instruction Memory " + "Interface."); + // Pre-load the first instruction instructionMemory_.requestRead({pc_, FETCH_SIZE}); @@ -26,8 +37,6 @@ Core::Core(memory::MemoryInterface& instructionMemory, } void Core::tick() { - ticks_++; - if (hasHalted_) return; if (pc_ >= programByteLength_) { @@ -35,127 +44,95 @@ void Core::tick() { return; } - if (exceptionHandler_ != nullptr) { - processExceptionHandler(); - return; - } - - if (pendingReads_ > 0) { - // Handle pending reads to a uop - auto& uop = microOps_.front(); - - const auto& completedReads = dataMemory_.getCompletedReads(); - for (const auto& response : completedReads) { - assert(pendingReads_ > 0); - uop->supplyData(response.target.address, response.data); - pendingReads_--; - } - dataMemory_.clearCompletedReads(); - - if (pendingReads_ == 0) { - // Load complete: resume execution - execute(uop); - } - - // More data pending, end cycle early - return; - } - - // Fetch - - // Determine if new uops are needed to be fetched - if (!microOps_.size()) { - // Find fetched memory that matches the current PC - const auto& fetched = instructionMemory_.getCompletedReads(); - size_t fetchIndex; - for (fetchIndex = 0; fetchIndex < fetched.size(); fetchIndex++) { - if (fetched[fetchIndex].target.address == pc_) { - break; - } - } - if (fetchIndex == fetched.size()) { - // Need to wait for fetched instructions - return; - } + ticks_++; + isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); - const auto& instructionBytes = fetched[fetchIndex].data; - auto bytesRead = isa_.predecode(instructionBytes.getAsVector(), - FETCH_SIZE, pc_, macroOp_); + // Fetch & Decode + assert(macroOp_.empty() && + "Cannot begin emulation tick with un-executed micro-ops."); + // We only fetch one instruction at a time, so only ever one result in + // complete reads + const auto& instructionBytes = instructionMemory_.getCompletedReads()[0].data; + // Predecode fetched data + auto bytesRead = isa_.predecode(instructionBytes.getAsVector(), + FETCH_SIZE, pc_, macroOp_); + // Clear the fetched data + instructionMemory_.clearCompletedReads(); - // Clear the fetched data - instructionMemory_.clearCompletedReads(); + pc_ += bytesRead; - pc_ += bytesRead; + // Loop over all micro-ops and execute one by one + while (!macroOp_.empty()) { + auto& uop = macroOp_.front(); - // Decode - for (size_t index = 0; index < macroOp_.size(); index++) { - microOps_.push(std::move(macroOp_[index])); + if (uop->exceptionEncountered()) { + handleException(uop); + // If fatal, return + if (hasHalted_) return; } - } - - auto& uop = microOps_.front(); - if (uop->exceptionEncountered()) { - handleException(uop); - return; - } - - // Issue - auto registers = uop->getSourceRegisters(); - for (size_t i = 0; i < registers.size(); i++) { - auto reg = registers[i]; - if (!uop->isOperandReady(i)) { - uop->supplyOperand(i, registerFileSet_.get(reg)); + // Issue + auto registers = uop->getSourceRegisters(); + for (size_t i = 0; i < registers.size(); i++) { + auto reg = registers[i]; + if (!uop->isOperandReady(i)) { + uop->supplyOperand(i, registerFileSet_.get(reg)); + } } - } - // Execute - if (uop->isLoad()) { - auto addresses = uop->generateAddresses(); - previousAddresses_.clear(); - if (uop->exceptionEncountered()) { - handleException(uop); - return; - } - if (addresses.size() > 0) { - // Memory reads are required; request them, set `pendingReads_` - // accordingly, and end the cycle early + // Execute & Write-back + if (uop->isLoad()) { + auto addresses = uop->generateAddresses(); + previousAddresses_.clear(); + if (uop->exceptionEncountered()) { + handleException(uop); + // If fatal, return + if (hasHalted_) return; + } + if (addresses.size() > 0) { + // Memory reads required; request them + for (auto const& target : addresses) { + dataMemory_.requestRead(target); + // Save addresses for use by instructions that perform a LD and STR + // (i.e. single instruction atomics) + previousAddresses_.push_back(target); + } + // Emulation core can only be used with a Flat memory interface, so data + // is ready immediately + const auto& completedReads = dataMemory_.getCompletedReads(); + assert( + completedReads.size() == addresses.size() && + "Number of completed reads does not match the number of requested " + "reads."); + for (const auto& response : completedReads) { + uop->supplyData(response.target.address, response.data); + } + dataMemory_.clearCompletedReads(); + } + } else if (uop->isStoreAddress()) { + auto addresses = uop->generateAddresses(); + previousAddresses_.clear(); + if (uop->exceptionEncountered()) { + handleException(uop); + // If fatal, return + if (hasHalted_) return; + } + // Store addresses for use by next store data operation in `execute()` for (auto const& target : addresses) { - dataMemory_.requestRead(target); - // Store addresses for use by next store data operation previousAddresses_.push_back(target); } - pendingReads_ = addresses.size(); - return; - } else { - // Early execution due to lacking addresses - execute(uop); - return; - } - } else if (uop->isStoreAddress()) { - auto addresses = uop->generateAddresses(); - previousAddresses_.clear(); - if (uop->exceptionEncountered()) { - handleException(uop); - return; - } - // Store addresses for use by next store data operation - for (auto const& target : addresses) { - previousAddresses_.push_back(target); - } - if (uop->isStoreData()) { - execute(uop); - } else { - // Fetch memory for next cycle - instructionMemory_.requestRead({pc_, FETCH_SIZE}); - microOps_.pop(); + if (!uop->isStoreData()) { + // No further action needed, move onto next micro-op + macroOp_.erase(macroOp_.begin()); + continue; + } } - - return; + execute(uop); + macroOp_.erase(macroOp_.begin()); } - - execute(uop); - isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); + instructionsExecuted_++; + // Fetch memory for next cycle + instructionMemory_.requestRead({pc_, FETCH_SIZE}); } bool Core::hasHalted() const { return hasHalted_; } @@ -194,25 +171,12 @@ void Core::execute(std::shared_ptr& uop) { } // Writeback - auto results = uop->getResults(); - auto destinations = uop->getDestinationRegisters(); - if (uop->isStoreData()) { - for (size_t i = 0; i < results.size(); i++) { - auto reg = destinations[i]; - registerFileSet_.set(reg, results[i]); - } - } else { - for (size_t i = 0; i < results.size(); i++) { - auto reg = destinations[i]; - registerFileSet_.set(reg, results[i]); - } + const auto& results = uop->getResults(); + const auto& destinations = uop->getDestinationRegisters(); + for (size_t i = 0; i < results.size(); i++) { + auto reg = destinations[i]; + registerFileSet_.set(reg, results[i]); } - - if (uop->isLastMicroOp()) instructionsExecuted_++; - - // Fetch memory for next cycle - instructionMemory_.requestRead({pc_, FETCH_SIZE}); - microOps_.pop(); } void Core::handleException(const std::shared_ptr& instruction) { @@ -223,16 +187,9 @@ void Core::handleException(const std::shared_ptr& instruction) { void Core::processExceptionHandler() { assert(exceptionHandler_ != nullptr && "Attempted to process an exception handler that wasn't present"); - if (dataMemory_.hasPendingRequests()) { - // Must wait for all memory requests to complete before processing the - // exception - return; - } - bool success = exceptionHandler_->tick(); - if (!success) { - // Handler needs further ticks to complete - return; + // Tick until true is returned, signifying completion + while (exceptionHandler_->tick() == false) { } const auto& result = exceptionHandler_->getResult(); @@ -248,10 +205,6 @@ void Core::processExceptionHandler() { // Clear the handler exceptionHandler_ = nullptr; - - // Fetch memory for next cycle - instructionMemory_.requestRead({pc_, FETCH_SIZE}); - microOps_.pop(); } } // namespace emulation diff --git a/src/lib/models/inorder/Core.cc b/src/lib/models/inorder/Core.cc index 9d36707b70..b196d2cf8c 100644 --- a/src/lib/models/inorder/Core.cc +++ b/src/lib/models/inorder/Core.cc @@ -39,10 +39,11 @@ Core::Core(memory::MemoryInterface& instructionMemory, } void Core::tick() { - ticks_++; - if (hasHalted_) return; + ticks_++; + isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); + if (exceptionHandler_ != nullptr) { processExceptionHandler(); return; @@ -104,7 +105,6 @@ void Core::tick() { } fetchUnit_.requestFromPC(); - isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); } bool Core::hasHalted() const { diff --git a/src/lib/models/outoforder/Core.cc b/src/lib/models/outoforder/Core.cc index 5c7793f133..4f7cf0f42d 100644 --- a/src/lib/models/outoforder/Core.cc +++ b/src/lib/models/outoforder/Core.cc @@ -101,10 +101,11 @@ Core::Core(memory::MemoryInterface& instructionMemory, } void Core::tick() { - ticks_++; - if (hasHalted_) return; + ticks_++; + isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); + if (exceptionHandler_ != nullptr) { processExceptionHandler(); return; @@ -156,7 +157,6 @@ void Core::tick() { flushIfNeeded(); fetchUnit_.requestFromPC(); - isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); } bool Core::hasHalted() const {