diff --git a/configs/DEMO_RISCV.yaml b/configs/DEMO_RISCV.yaml index 5f3387ef1f..1cd86d8646 100644 --- a/configs/DEMO_RISCV.yaml +++ b/configs/DEMO_RISCV.yaml @@ -26,11 +26,10 @@ Queue-Sizes: Load: 64 Store: 36 Branch-Predictor: + Type: "Perceptron" BTB-Tag-Bits: 11 - Saturating-Count-Bits: 2 - Global-History-Length: 10 - RAS-entries: 5 - Fallback-Static-Predictor: "Always-Taken" + Global-History-Length: 19 + RAS-entries: 1 L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: diff --git a/configs/a64fx.yaml b/configs/a64fx.yaml index 2170e22b21..36d09a42c9 100644 --- a/configs/a64fx.yaml +++ b/configs/a64fx.yaml @@ -29,11 +29,10 @@ Queue-Sizes: Load: 40 Store: 24 Branch-Predictor: + Type: "Perceptron" BTB-Tag-Bits: 11 - Saturating-Count-Bits: 2 - Global-History-Length: 11 + Global-History-Length: 19 RAS-entries: 8 - Fallback-Static-Predictor: "Always-Taken" L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml index 6be2b171cd..7b1442cc32 100644 --- a/configs/a64fx_SME.yaml +++ b/configs/a64fx_SME.yaml @@ -31,11 +31,10 @@ Queue-Sizes: Load: 40 Store: 24 Branch-Predictor: + Type: "Perceptron" BTB-Tag-Bits: 11 - Saturating-Count-Bits: 2 - Global-History-Length: 11 + Global-History-Length: 19 RAS-entries: 8 - Fallback-Static-Predictor: "Always-Taken" L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: diff --git a/configs/m1_firestorm.yaml b/configs/m1_firestorm.yaml index 901ed5ad5a..a593500685 100644 --- a/configs/m1_firestorm.yaml +++ b/configs/m1_firestorm.yaml @@ -25,11 +25,10 @@ Queue-Sizes: Load: 130 Store: 60 Branch-Predictor: - BTB-Tag-Bits: 11 - Saturating-Count-Bits: 2 - Global-History-Length: 11 - RAS-entries: 8 - Fallback-Static-Predictor: "Always-Taken" + Type: "Perceptron" + BTB-Tag-Bits: 11 + Global-History-Length: 19 + RAS-entries: 8 L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: diff --git a/configs/sst-cores/a64fx-sst.yaml b/configs/sst-cores/a64fx-sst.yaml index 3bb2d3f499..fd503c668d 100644 --- a/configs/sst-cores/a64fx-sst.yaml +++ b/configs/sst-cores/a64fx-sst.yaml @@ -29,11 +29,10 @@ Queue-Sizes: Load: 40 Store: 24 Branch-Predictor: + Type: "Perceptron" BTB-Tag-Bits: 11 - Saturating-Count-Bits: 2 - Global-History-Length: 11 + Global-History-Length: 19 RAS-entries: 8 - Fallback-Static-Predictor: "Always-Taken" L1-Data-Memory: Interface-Type: External L1-Instruction-Memory: diff --git a/configs/sst-cores/m1_firestorm-sst.yaml b/configs/sst-cores/m1_firestorm-sst.yaml index 9ca033bf71..e7bc241b8f 100644 --- a/configs/sst-cores/m1_firestorm-sst.yaml +++ b/configs/sst-cores/m1_firestorm-sst.yaml @@ -25,11 +25,10 @@ Queue-Sizes: Load: 130 Store: 60 Branch-Predictor: + Type: "Perceptron" BTB-Tag-Bits: 11 - Saturating-Count-Bits: 2 - Global-History-Length: 11 + Global-History-Length: 11 RAS-entries: 8 - Fallback-Static-Predictor: "Always-Taken" L1-Data-Memory: Interface-Type: External L1-Instruction-Memory: diff --git a/configs/sst-cores/tx2-sst.yaml b/configs/sst-cores/tx2-sst.yaml index b5d4fbf034..e3d1e3231c 100644 --- a/configs/sst-cores/tx2-sst.yaml +++ b/configs/sst-cores/tx2-sst.yaml @@ -27,11 +27,10 @@ Queue-Sizes: Load: 64 Store: 36 Branch-Predictor: + Type: "Perceptron" BTB-Tag-Bits: 11 - Saturating-Count-Bits: 2 Global-History-Length: 10 RAS-entries: 5 - Fallback-Static-Predictor: "Always-Taken" L1-Data-Memory: Interface-Type: External L1-Instruction-Memory: diff --git a/configs/tx2.yaml b/configs/tx2.yaml index 5c57440111..a5e28807f9 100644 --- a/configs/tx2.yaml +++ b/configs/tx2.yaml @@ -27,11 +27,10 @@ Queue-Sizes: Load: 64 Store: 36 Branch-Predictor: + Type: "Perceptron" BTB-Tag-Bits: 11 - Saturating-Count-Bits: 2 - Global-History-Length: 10 + Global-History-Length: 19 RAS-entries: 5 - Fallback-Static-Predictor: "Always-Taken" L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: diff --git a/docs/sphinx/developer/components/branchPred.rst b/docs/sphinx/developer/components/branchPred.rst index b49f58c1c1..ba91fe98d9 100644 --- a/docs/sphinx/developer/components/branchPred.rst +++ b/docs/sphinx/developer/components/branchPred.rst @@ -31,7 +31,24 @@ Branch Target Buffer (BTB) If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) - Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for use by a proceeding Return instruction. + Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. Static Prediction Based on the chosen static prediction method of "always taken" or "always not taken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of taken or not-taken respectively. + +Perceptron Predictor +-------------------- +The ``PerceptronPredictor`` has the same overall structure as the ``GenericPredictor`` but replaces the saturating counter as a means for direction prediction with a perceptron. The ``PerceptronPredictor`` contains the following logic. + +Global History + For indexing relevant prediction structures and for retrieving a direction from the perceptrons, a global history can be utilised. The global history value uses n-bits to store the n most recent branch direction outcomes, with the left-most bit being the oldest. + +Branch Target Buffer (BTB) + For each entry, the BTB stores the most recent target along with a perceptron for an associated direction. The indexing of this structure uses the lower, non-zero bits of an instruction address XOR'ed with the current global branch history value. + + The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not taken if this is negative, or taken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. + + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. + +Return Address Stack (RAS) + Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. \ No newline at end of file diff --git a/docs/sphinx/user/configuring_simeng.rst b/docs/sphinx/user/configuring_simeng.rst index 344585f77b..e60a0a7372 100644 --- a/docs/sphinx/user/configuring_simeng.rst +++ b/docs/sphinx/user/configuring_simeng.rst @@ -145,20 +145,23 @@ The Branch-Prediction section contains those options to parameterise the branch The current options include: +Type + The type of branch predictor that is used, the options are ``Generic``, and ``Perceptron``. Both types of predictor use a branch target buffer with each entry containing a direction prediction mechanism and a target address. The direction predictor used in ``Generic`` is a saturating counter, and in ``Perceptron`` it is a perceptron. + BTB-Tag-Bits - The number of bits used to denote an entry in the Branch Target Buffer (BTB). For example, a ``bits`` value of 12 could denote 4096 entries with the calculation 1 << ``bits``. + The number of bits used to index the entries in the Branch Target Buffer (BTB). The number of entries in the BTB is obtained from the calculation: 1 << ``bits``. For example, a ``bits`` value of 12 would result in a BTB with 4096 entries. Saturating-Count-Bits - The number of bits used in the saturating counter value. + Only needed for a ``Generic`` predictor. The number of bits used in the saturating counter value. Global-History-Length - The number of bits used to record the global history of branch directions. Each bit represents one branch direction. + The number of bits used to record the global history of branch directions. Each bit represents one branch direction. For ``PerceptronPredictor``, this dictates the size of the perceptrons (with each perceptron having Global-History-Length + 1 weights). RAS-entries The number of entries in the Return Address Stack (RAS). Fallback-Static-Predictor - The static predictor used when no dynamic prediction is available. The options are either ``"Always-Taken"`` or ``"Always-Not-Taken"``. + Only needed for a ``Generic`` predictor. The static predictor used when no dynamic prediction is available. The options are either ``"Always-Taken"`` or ``"Always-Not-Taken"``. .. _l1dcnf: diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh index 2e7b923a65..8b687cab7e 100644 --- a/src/include/simeng/CoreInstance.hh +++ b/src/include/simeng/CoreInstance.hh @@ -8,6 +8,7 @@ #include "simeng/FixedLatencyMemoryInterface.hh" #include "simeng/FlatMemoryInterface.hh" #include "simeng/GenericPredictor.hh" +#include "simeng/PerceptronPredictor.hh" #include "simeng/SpecialFileDirGen.hh" #include "simeng/arch/Architecture.hh" #include "simeng/arch/aarch64/Architecture.hh" diff --git a/src/include/simeng/PerceptronPredictor.hh b/src/include/simeng/PerceptronPredictor.hh new file mode 100644 index 0000000000..b76e4dd7e4 --- /dev/null +++ b/src/include/simeng/PerceptronPredictor.hh @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include + +#include "simeng/BranchPredictor.hh" +#include "simeng/config/SimInfo.hh" + +namespace simeng { + +/** A Perceptron branch predictor implementing the branch predictor described in + * Jimenez and Lin ("Dynamic branch prediction with perceptrons", IEEE High- + * Performance Computer Architecture Symposium Proceedings (2001), 197-206 -- + * https://www.cs.utexas.edu/~lin/papers/hpca01.pdf). + * The following predictors have been included: + * + * - Static predictor based on pre-allocated branch type. + * + * - A Branch Target Buffer (BTB) with a local and global indexing scheme and a + * perceptron. + * + * - A Return Address Stack (RAS) is also in use. + */ + +class PerceptronPredictor : public BranchPredictor { + public: + /** Initialise predictor models. */ + PerceptronPredictor(ryml::ConstNodeRef config = config::SimInfo::getConfig()); + ~PerceptronPredictor(); + + /** Generate a branch prediction for the supplied instruction address, a + * branch type, and a known branch offset; defaults to 0 meaning offset is not + * known. Returns a branch direction and branch target address. */ + BranchPrediction predict(uint64_t address, BranchType type, + int64_t knownOffset = 0) override; + + /** Updates appropriate predictor model objects based on the address and + * outcome of the branch instruction. */ + void update(uint64_t address, bool taken, uint64_t targetAddress, + BranchType type) override; + + /** Provides RAS rewinding behaviour. */ + void flush(uint64_t address) override; + + private: + /** Returns the dot product of a perceptron and a history vector. Used to + * determine a direction prediction */ + int64_t getDotProduct(const std::vector& perceptron, + uint64_t history); + + /** The length in bits of the BTB index; BTB will have 2^bits entries. */ + uint64_t btbBits_; + + /** A 2^bits length vector of pairs containing a perceptron with + * globalHistoryLength_ + 1 inputs, and a branch target. + * The perceptrons are used to provide a branch direction prediction by + * taking a dot product with the global history, as described + * in Jiminez and Lin */ + std::vector, uint64_t>> btb_; + + /** The previous hashed index for an address. */ + std::map btbHistory_; + + /** An n-bit history of previous branch directions where n is equal to + * globalHistoryLength_. */ + uint64_t globalHistory_ = 0; + + /** The number of previous branch directions recorded globally. */ + uint64_t globalHistoryLength_; + + /** The magnitude of the dot product of the perceptron and the global history, + * below which the perceptron's weight must be updated */ + uint64_t trainingThreshold_; + + /** A return address stack. */ + std::deque ras_; + + /** RAS history with instruction address as the keys. A non-zero value + * represents the target prediction for a return instruction and a 0 entry for + * a branch-and-link instruction. */ + std::map rasHistory_; + + /** The size of the RAS. */ + uint64_t rasSize_; +}; + +} // namespace simeng diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 4703ed5e88..d5e20a6421 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -42,6 +42,7 @@ set(SIMENG_SOURCES FlatMemoryInterface.cc GenericPredictor.cc Instruction.cc + PerceptronPredictor.cc RegisterFileSet.cc RegisterValue.cc SpecialFileDirGen.cc diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc index af4ee73b9d..2e9af897ff 100644 --- a/src/lib/CoreInstance.cc +++ b/src/lib/CoreInstance.cc @@ -219,8 +219,13 @@ void CoreInstance::createCore() { arch_ = std::make_unique(kernel_); } - // Construct branch predictor object - predictor_ = std::make_unique(); + std::string predictorType = + config_["Branch-Predictor"]["Type"].as(); + if (predictorType == "Generic") { + predictor_ = std::make_unique(); + } else if (predictorType == "Perceptron") { + predictor_ = std::make_unique(); + } // Extract the port arrangement from the config file auto config_ports = config_["Ports"]; diff --git a/src/lib/PerceptronPredictor.cc b/src/lib/PerceptronPredictor.cc new file mode 100644 index 0000000000..18ae064d32 --- /dev/null +++ b/src/lib/PerceptronPredictor.cc @@ -0,0 +1,160 @@ +#include "simeng/PerceptronPredictor.hh" + +namespace simeng { + +PerceptronPredictor::PerceptronPredictor(ryml::ConstNodeRef config) + : btbBits_(config["Branch-Predictor"]["BTB-Tag-Bits"].as()), + globalHistoryLength_( + config["Branch-Predictor"]["Global-History-Length"].as()), + rasSize_(config["Branch-Predictor"]["RAS-entries"].as()) { + // Build BTB based on config options + uint32_t btbSize = (1 << btbBits_); + btb_.resize(btbSize); + // Initialise perceptron values with 0 for the global history weights, and 1 + // for the bias weight; and initialise the target with 0 (i.e., unknown) + for (int i = 0; i < btbSize; i++) { + btb_[i].first.assign(globalHistoryLength_, 0); + btb_[i].first.push_back(1); + btb_[i].second = 0; + } + + // Set up training threshold according to empirically determined formula + trainingThreshold_ = (uint64_t)((1.93 * globalHistoryLength_) + 14); +} + +PerceptronPredictor::~PerceptronPredictor() { + ras_.clear(); + rasHistory_.clear(); +} + +BranchPrediction PerceptronPredictor::predict(uint64_t address, BranchType type, + int64_t knownOffset) { + // Get the hashed index for the prediction table. XOR the global history with + // the non-zero bits of the address, and then keep only the btbBits_ bits of + // the output to keep it in bounds of the prediction table. + uint64_t hashedIndex = + ((address >> 2) ^ globalHistory_) & ((1 << btbBits_) - 1); + + // Store the global history for correct hashing in update() -- + // needs to be global history and not the hashed index as hashing loses + // information at longer global history lengths + btbHistory_[address] = globalHistory_; + + // Retrieve the perceptron from the BTB + std::vector perceptron = btb_[hashedIndex].first; + + // Get dot product of perceptron and history + int64_t Pout = getDotProduct(perceptron, globalHistory_); + // Determine direction prediction based on its sign + bool direction = (Pout >= 0); + + // Retrieve target prediction + uint64_t target = + (knownOffset != 0) ? address + knownOffset : btb_[hashedIndex].second; + + BranchPrediction prediction = {direction, target}; + + // Amend prediction based on branch type + if (type == BranchType::Unconditional) { + prediction.taken = true; + } else if (type == BranchType::Return) { + prediction.taken = true; + // Return branches can use the RAS if an entry is available + if (ras_.size() > 0) { + prediction.target = ras_.back(); + // Record top of RAS used for target prediction + rasHistory_[address] = ras_.back(); + ras_.pop_back(); + } + } else if (type == BranchType::SubroutineCall) { + prediction.taken = true; + // Subroutine call branches must push their associated return address to RAS + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(address + 4); + // Record that this address is a branch-and-link instruction + rasHistory_[address] = 0; + } else if (type == BranchType::Conditional) { + if (!prediction.taken) prediction.target = address + 4; + } + return prediction; +} + +void PerceptronPredictor::update(uint64_t address, bool taken, + uint64_t targetAddress, BranchType type) { + // Work out hash index + uint64_t prevGlobalHistory = btbHistory_[address]; + uint64_t hashedIndex = + ((address >> 2) ^ prevGlobalHistory) & ((1 << btbBits_) - 1); + + std::vector perceptron = btb_[hashedIndex].first; + + // Work out the most recent prediction + int64_t Pout = getDotProduct(perceptron, prevGlobalHistory); + bool directionPrediction = (Pout >= 0); + + // Update the perceptron if the prediction was wrong, or the dot product's + // magnitude was not greater than the training threshold + if ((directionPrediction != taken) || (abs(Pout) < trainingThreshold_)) { + int8_t t = (taken) ? 1 : -1; + + for (int i = 0; i < globalHistoryLength_; i++) { + int8_t xi = + ((prevGlobalHistory & (1 << ((globalHistoryLength_ - 1) - i))) == 0) + ? -1 + : 1; + int8_t product_xi_t = xi * t; + // Make sure no overflow (+-127) + if (!(perceptron[i] == 127 && product_xi_t == 1) && + !(perceptron[i] == -127 && product_xi_t == -1)) { + perceptron[i] += product_xi_t; + } + } + perceptron[globalHistoryLength_] += t; + } + + btb_[hashedIndex].first = perceptron; + btb_[hashedIndex].second = targetAddress; + + globalHistory_ = + ((globalHistory_ << 1) | taken) & ((1 << globalHistoryLength_) - 1); + return; +} + +void PerceptronPredictor::flush(uint64_t address) { + // If address interacted with RAS, rewind entry + auto it = rasHistory_.find(address); + if (it != rasHistory_.end()) { + uint64_t target = it->second; + if (target != 0) { + // If history entry belongs to a return instruction, push target back onto + // stack + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(target); + } else { + // If history entry belongs to a branch-and-link instruction, pop target + // off of stack + if (ras_.size()) { + ras_.pop_back(); + } + } + rasHistory_.erase(it); + } +} + +int64_t PerceptronPredictor::getDotProduct( + const std::vector& perceptron, uint64_t history) { + int64_t Pout = perceptron[globalHistoryLength_]; + for (int i = 0; i < globalHistoryLength_; i++) { + // Get branch direction for ith entry in the history + bool historyTaken = + ((history & (1 << ((globalHistoryLength_ - 1) - i))) != 0); + Pout += historyTaken ? perceptron[i] : (0 - perceptron[i]); + } + return Pout; +} + +} // namespace simeng diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc index 697c14c7c4..d1313dc5a3 100644 --- a/src/lib/config/ModelConfig.cc +++ b/src/lib/config/ModelConfig.cc @@ -424,16 +424,16 @@ void ModelConfig::setExpectations(bool isDefault) { expectations_.addChild( ExpectationNode::createExpectation("Branch-Predictor")); + expectations_["Branch-Predictor"].addChild( + ExpectationNode::createExpectation("Perceptron", "Type")); + expectations_["Branch-Predictor"]["Type"].setValueSet( + std::vector{"Generic", "Perceptron"}); + expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation(8, "BTB-Tag-Bits")); expectations_["Branch-Predictor"]["BTB-Tag-Bits"].setValueBounds(1, 64); - expectations_["Branch-Predictor"].addChild( - ExpectationNode::createExpectation(2, "Saturating-Count-Bits")); - expectations_["Branch-Predictor"]["Saturating-Count-Bits"] - .setValueBounds(1, 64); - expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation(8, "Global-History-Length")); expectations_["Branch-Predictor"]["Global-History-Length"] @@ -444,11 +444,22 @@ void ModelConfig::setExpectations(bool isDefault) { expectations_["Branch-Predictor"]["RAS-entries"].setValueBounds( 1, UINT16_MAX); - expectations_["Branch-Predictor"].addChild( - ExpectationNode::createExpectation( - "Always-Taken", "Fallback-Static-Predictor")); - expectations_["Branch-Predictor"]["Fallback-Static-Predictor"].setValueSet( - std::vector{"Always-Taken", "Always-Not-Taken"}); + // The saturating counter bits and the fallback predictor + // are relevant to the GenericPredictor only + if (!isDefault && + configTree_["Branch-Predictor"]["Type"].as() == "Generic") { + expectations_["Branch-Predictor"].addChild( + ExpectationNode::createExpectation(2, + "Saturating-Count-Bits")); + expectations_["Branch-Predictor"]["Saturating-Count-Bits"] + .setValueBounds(1, 64); + + expectations_["Branch-Predictor"].addChild( + ExpectationNode::createExpectation( + "Always-Taken", "Fallback-Static-Predictor")); + expectations_["Branch-Predictor"]["Fallback-Static-Predictor"].setValueSet( + std::vector{"Always-Taken", "Always-Not-Taken"}); + } // L1-Data-Memory expectations_.addChild(ExpectationNode::createExpectation("L1-Data-Memory")); diff --git a/test/integration/ConfigTest.cc b/test/integration/ConfigTest.cc index ed0f4124de..7907f57762 100644 --- a/test/integration/ConfigTest.cc +++ b/test/integration/ConfigTest.cc @@ -49,10 +49,9 @@ TEST(ConfigTest, Default) { "'FloatingPoint/SVE-Count': 32\n 'Predicate-Count': 17\n " "'Conditional-Count': 1\n 'Matrix-Count': 1\n'Pipeline-Widths':\n " "Commit: 1\n FrontEnd: 1\n 'LSQ-Completion': 1\n'Queue-Sizes':\n ROB: " - "32\n Load: 16\n Store: 16\n'Branch-Predictor':\n 'BTB-Tag-Bits': 8\n " - " 'Saturating-Count-Bits': 2\n 'Global-History-Length': 8\n " - "'RAS-entries': 8\n 'Fallback-Static-Predictor': " - "'Always-Taken'\n'L1-Data-Memory':\n 'Interface-Type': " + "32\n Load: 16\n Store: 16\n'Branch-Predictor':\n Type: Perceptron\n " + "'BTB-Tag-Bits': 8\n 'Global-History-Length': 8\n 'RAS-entries': " + "8\n'L1-Data-Memory':\n 'Interface-Type': " "Flat\n'L1-Instruction-Memory':\n 'Interface-Type': " "Flat\n'LSQ-L1-Interface':\n 'Access-Latency': 4\n Exclusive: 0\n " "'Load-Bandwidth': 32\n 'Store-Bandwidth': 32\n " @@ -69,8 +68,8 @@ TEST(ConfigTest, Default) { " - 6343\n 'Execution-Latency': 1\n 'Execution-Throughput': 1\n " " 'Instruction-Group-Nums':\n - 87\n'CPU-Info':\n " "'Generate-Special-Dir': 1\n 'Special-File-Dir-Path': " SIMENG_BUILD_DIR - "/specialFiles/\n 'Core-Count': 1\n 'Socket-Count': 1\n " - "SMT: 1\n BogoMIPS: 0\n Features: ''\n 'CPU-Implementer': 0x0\n " + "/specialFiles/\n 'Core-Count': 1\n 'Socket-Count': 1\n SMT: 1\n " + "BogoMIPS: 0\n Features: ''\n 'CPU-Implementer': 0x0\n " "'CPU-Architecture': 0\n 'CPU-Variant': 0x0\n 'CPU-Part': 0x0\n " "'CPU-Revision': 0\n 'Package-Count': 1\n"; EXPECT_EQ(emittedConfig, expectedValues); @@ -104,10 +103,9 @@ TEST(ConfigTest, Default) { "100000\n'Register-Set':\n 'GeneralPurpose-Count': 32\n " "'FloatingPoint-Count': 32\n'Pipeline-Widths':\n Commit: 1\n FrontEnd: " "1\n 'LSQ-Completion': 1\n'Queue-Sizes':\n ROB: 32\n Load: 16\n " - "Store: 16\n'Branch-Predictor':\n 'BTB-Tag-Bits': 8\n " - "'Saturating-Count-Bits': 2\n 'Global-History-Length': 8\n " - "'RAS-entries': 8\n 'Fallback-Static-Predictor': " - "'Always-Taken'\n'L1-Data-Memory':\n 'Interface-Type': " + "Store: 16\n'Branch-Predictor':\n Type: Perceptron\n 'BTB-Tag-Bits': " + "8\n 'Global-History-Length': 8\n 'RAS-entries': " + "8\n'L1-Data-Memory':\n 'Interface-Type': " "Flat\n'L1-Instruction-Memory':\n 'Interface-Type': " "Flat\n'LSQ-L1-Interface':\n 'Access-Latency': 4\n Exclusive: 0\n " "'Load-Bandwidth': 32\n 'Store-Bandwidth': 32\n " @@ -124,8 +122,8 @@ TEST(ConfigTest, Default) { " - 450\n 'Execution-Latency': 1\n 'Execution-Throughput': 1\n " "'Instruction-Group-Nums':\n - 24\n'CPU-Info':\n " "'Generate-Special-Dir': 1\n 'Special-File-Dir-Path': " SIMENG_BUILD_DIR - "/specialFiles/\n 'Core-Count': 1\n 'Socket-Count': 1\n " - "SMT: 1\n BogoMIPS: 0\n Features: ''\n 'CPU-Implementer': 0x0\n " + "/specialFiles/\n 'Core-Count': 1\n 'Socket-Count': 1\n SMT: 1\n " + "BogoMIPS: 0\n Features: ''\n 'CPU-Implementer': 0x0\n " "'CPU-Architecture': 0\n 'CPU-Variant': 0x0\n 'CPU-Part': 0x0\n " "'CPU-Revision': 0\n 'Package-Count': 1\n"; EXPECT_EQ(emittedConfig, expectedValues); diff --git a/test/regression/RegressionTest.cc b/test/regression/RegressionTest.cc index 5b43ac6cd0..6c5c30e90d 100644 --- a/test/regression/RegressionTest.cc +++ b/test/regression/RegressionTest.cc @@ -5,6 +5,7 @@ #include "simeng/FixedLatencyMemoryInterface.hh" #include "simeng/FlatMemoryInterface.hh" #include "simeng/GenericPredictor.hh" +#include "simeng/PerceptronPredictor.hh" #include "simeng/config/SimInfo.hh" #include "simeng/kernel/Linux.hh" #include "simeng/kernel/LinuxProcess.hh" @@ -94,7 +95,16 @@ void RegressionTest::run(const char* source, const char* triple, createPortAllocator(); // Create a branch predictor for a pipelined core - simeng::GenericPredictor predictor = simeng::GenericPredictor(); + std::unique_ptr predictor_ = nullptr; + std::string predictorType = + simeng::config::SimInfo::getConfig()["Branch-Predictor"]["Type"] + .as(); + if (predictorType == "Generic") { + predictor_ = std::make_unique(); + } else if (predictorType == "Perceptron") { + predictor_ = std::make_unique(); + } + // Create the core model switch (std::get<0>(GetParam())) { case EMULATION: @@ -106,13 +116,13 @@ void RegressionTest::run(const char* source, const char* triple, case INORDER: core_ = std::make_unique( instructionMemory, *flatDataMemory, processMemorySize_, entryPoint, - *architecture_, predictor); + *architecture_, *predictor_); dataMemory = std::move(flatDataMemory); break; case OUTOFORDER: core_ = std::make_unique( instructionMemory, *fixedLatencyDataMemory, processMemorySize_, - entryPoint, *architecture_, predictor, *portAllocator); + entryPoint, *architecture_, *predictor_, *portAllocator); dataMemory = std::move(fixedLatencyDataMemory); break; } diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index fd1e4f9882..ade9822076 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -32,6 +32,7 @@ set(TEST_SOURCES ProcessTest.cc RegisterFileSetTest.cc RegisterValueTest.cc + PerceptronPredictorTest.cc SpecialFileDirGenTest.cc ) diff --git a/test/unit/GenericPredictorTest.cc b/test/unit/GenericPredictorTest.cc index 898e7e93e2..c546157021 100644 --- a/test/unit/GenericPredictorTest.cc +++ b/test/unit/GenericPredictorTest.cc @@ -19,17 +19,17 @@ class GenericPredictorTest : public testing::Test { // miss TEST_F(GenericPredictorTest, Miss) { simeng::config::SimInfo::addToConfig( - "{Branch-Predictor: {BTB-Tag-Bits: 11, Saturating-Count-Bits: 2, " - "Global-History-Length: 10, RAS-entries: 5, Fallback-Static-Predictor: " - "Always-Taken}}"); + "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, " + "Saturating-Count-Bits: 2, Global-History-Length: 10, RAS-entries: 5, " + "Fallback-Static-Predictor: Always-Taken}}"); auto predictor = simeng::GenericPredictor(); auto prediction = predictor.predict(0, BranchType::Conditional, 0); EXPECT_TRUE(prediction.taken); simeng::config::SimInfo::addToConfig( - "{Branch-Predictor: {BTB-Tag-Bits: 11, Saturating-Count-Bits: 2, " - "Global-History-Length: 10, RAS-entries: 5, Fallback-Static-Predictor: " - "Always-Not-Taken}}"); + "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, " + "Saturating-Count-Bits: 2, Global-History-Length: 10, RAS-entries: 5, " + "Fallback-Static-Predictor: Always-Not-Taken}}"); predictor = simeng::GenericPredictor(); prediction = predictor.predict(0, BranchType::Conditional, 0); EXPECT_FALSE(prediction.taken); @@ -41,9 +41,9 @@ TEST_F(GenericPredictorTest, Miss) { // correctly TEST_F(GenericPredictorTest, RAS) { simeng::config::SimInfo::addToConfig( - "{Branch-Predictor: {BTB-Tag-Bits: 11, Saturating-Count-Bits: 2, " - "Global-History-Length: 10, RAS-entries: 10, Fallback-Static-Predictor: " - "Always-Taken}}"); + "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, " + "Saturating-Count-Bits: 2, Global-History-Length: 10, RAS-entries: 10, " + "Fallback-Static-Predictor: Always-Taken}}"); auto predictor = simeng::GenericPredictor(); auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); EXPECT_TRUE(prediction.taken); @@ -82,9 +82,9 @@ TEST_F(GenericPredictorTest, RAS) { // correctly, when no address aliasing has occurred TEST_F(GenericPredictorTest, Hit) { simeng::config::SimInfo::addToConfig( - "{Branch-Predictor: {BTB-Tag-Bits: 11, Saturating-Count-Bits: 2, " - "Global-History-Length: 1, RAS-entries: 5, Fallback-Static-Predictor: " - "Always-Taken}}"); + "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, " + "Saturating-Count-Bits: 2, Global-History-Length: 1, RAS-entries: 5, " + "Fallback-Static-Predictor: Always-Taken}}"); auto predictor = simeng::GenericPredictor(); predictor.update(0, true, 16, BranchType::Conditional); predictor.update(0, true, 16, BranchType::Conditional); @@ -101,9 +101,9 @@ TEST_F(GenericPredictorTest, Hit) { // behaviours of the same branch but in different states of the program TEST_F(GenericPredictorTest, GlobalIndexing) { simeng::config::SimInfo::addToConfig( - "{Branch-Predictor: {BTB-Tag-Bits: 11, Saturating-Count-Bits: 2, " - "Global-History-Length: 5, RAS-entries: 5, Fallback-Static-Predictor: " - "Always-Not-Taken}}"); + "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, " + "Saturating-Count-Bits: 2, Global-History-Length: 5, RAS-entries: 5, " + "Fallback-Static-Predictor: Always-Not-Taken}}"); auto predictor = simeng::GenericPredictor(); // Spool up first global history pattern predictor.update(0, true, 4, BranchType::Unconditional); diff --git a/test/unit/PerceptronPredictorTest.cc b/test/unit/PerceptronPredictorTest.cc new file mode 100644 index 0000000000..b276b3795f --- /dev/null +++ b/test/unit/PerceptronPredictorTest.cc @@ -0,0 +1,185 @@ +#include "MockInstruction.hh" +#include "gtest/gtest.h" +#include "simeng/PerceptronPredictor.hh" + +namespace simeng { + +class PerceptronPredictorTest : public testing::Test { + public: + PerceptronPredictorTest() : uop(new MockInstruction), uopPtr(uop) { + uop->setInstructionAddress(0); + } + + protected: + MockInstruction* uop; + std::shared_ptr uopPtr; +}; + +// Tests that the PerceptronPredictor will predict the correct direction on a +// miss +TEST_F(PerceptronPredictorTest, Miss) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: {Type: Perceptron, BTB-Tag-Bits: 11, " + "Global-History-Length: 10, RAS-entries: 5}}"); + auto predictor = simeng::PerceptronPredictor(); + auto prediction = predictor.predict(0, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.taken); + prediction = predictor.predict(8, BranchType::Unconditional, 0); + EXPECT_TRUE(prediction.taken); +} + +// Tests that the PerceptronPredictor will predict branch-and-link return pairs +// correctly +TEST_F(PerceptronPredictorTest, RAS) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: {Type: Perceptron, BTB-Tag-Bits: 11, " + "Global-History-Length: 10, RAS-entries: 10}}"); + auto predictor = simeng::PerceptronPredictor(); + auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 16); + prediction = predictor.predict(24, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 32); + prediction = predictor.predict(40, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 48); + prediction = predictor.predict(56, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 64); + prediction = predictor.predict(72, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 80); + + prediction = predictor.predict(84, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 76); + prediction = predictor.predict(68, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 60); + prediction = predictor.predict(52, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 44); + prediction = predictor.predict(36, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 28); + prediction = predictor.predict(20, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 12); +} + +// Tests that the PerceptronPredictor will predict a previously encountered +// branch correctly, when no address aliasing has occurred +TEST_F(PerceptronPredictorTest, Hit) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: {Type: Perceptron, BTB-Tag-Bits: 11, " + "Global-History-Length: 1, RAS-entries: 5}}"); + auto predictor = simeng::PerceptronPredictor(); + predictor.update(0, true, 16, BranchType::Conditional); + predictor.update(0, true, 16, BranchType::Conditional); + predictor.update(0, true, 16, BranchType::Conditional); + predictor.update(0, true, 16, BranchType::Conditional); + predictor.update(0, false, 16, BranchType::Conditional); + + auto prediction = predictor.predict(0, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 16); +} + +// Tests that the PeceptronPredictor will predict correctly for two different +// behaviours of the same branch but in different states of the program +TEST_F(PerceptronPredictorTest, GlobalIndexing) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: {Type: Perceptron, BTB-Tag-Bits: 11, " + "Global-History-Length: 5, RAS-entries: 5}}"); + auto predictor = simeng::PerceptronPredictor(); + // Spool up first global history pattern + predictor.update(0, true, 4, BranchType::Unconditional); + predictor.update(0, false, 4, BranchType::Unconditional); + predictor.update(0, false, 4, BranchType::Unconditional); + predictor.update(0, false, 4, BranchType::Unconditional); + predictor.update(0, true, 4, BranchType::Unconditional); + // Ensure default behaviour for first encounter + auto prediction = predictor.predict(0x1F, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 0); + // Set entry in BTB + predictor.update(0x1F, false, 0xAB, BranchType::Conditional); + + // Spool up second global history pattern + predictor.update(0, false, 4, BranchType::Unconditional); + predictor.update(0, true, 4, BranchType::Unconditional); + predictor.update(0, true, 4, BranchType::Unconditional); + predictor.update(0, true, 4, BranchType::Unconditional); + predictor.update(0, false, 4, BranchType::Unconditional); + // Ensure default behaviour for re-encounter but with different global history + prediction = predictor.predict(0x1F, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 0); + // Set entry in BTB + predictor.update(0x1F, true, 0xBA, BranchType::Conditional); + + // Recreate first global history pattern + predictor.update(0, true, 4, BranchType::Unconditional); + predictor.update(0, false, 4, BranchType::Unconditional); + predictor.update(0, false, 4, BranchType::Unconditional); + predictor.update(0, false, 4, BranchType::Unconditional); + predictor.update(0, true, 4, BranchType::Unconditional); + // Get prediction + prediction = predictor.predict(0x1F, BranchType::Conditional, 0); + EXPECT_FALSE(prediction.taken); + EXPECT_EQ(prediction.target, 0x23); + // Set entry in BTB + predictor.update(0x1F, true, 0xAB, BranchType::Conditional); + + // Recreate second global history pattern + predictor.update(0, false, 4, BranchType::Unconditional); + predictor.update(0, true, 4, BranchType::Unconditional); + predictor.update(0, true, 4, BranchType::Unconditional); + predictor.update(0, true, 4, BranchType::Unconditional); + predictor.update(0, false, 4, BranchType::Unconditional); + // Get prediction + prediction = predictor.predict(0x1F, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 0xBA); + predictor.update(0x1F, true, 0xBA, BranchType::Conditional); +} + +// Test Flush of RAS functionality +TEST_F(PerceptronPredictorTest, flush) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: {Type: Perceptron, BTB-Tag-Bits: 11, " + "Global-History-Length: 10, RAS-entries: 10}}"); + auto predictor = simeng::PerceptronPredictor(); + // Add some entries to the RAS + auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 16); + prediction = predictor.predict(24, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 32); + prediction = predictor.predict(40, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 48); + + // Start getting entries from RAS + prediction = predictor.predict(52, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 44); + prediction = predictor.predict(36, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 28); + + // Flush address + predictor.flush(36); + + // Continue getting entries from RAS + prediction = predictor.predict(20, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 28); + prediction = predictor.predict(16, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 12); +} + +} // namespace simeng