diff --git a/CMakeLists.txt b/CMakeLists.txt index cae649dddd..9bd70577f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -215,7 +215,7 @@ endif() # saves us from having to build all targets before running the tests add_custom_target(test-all COMMAND ${CMAKE_CTEST_COMMAND} - DEPENDS unittests regression-aarch64 regression-riscv + DEPENDS unittests regression-aarch64 regression-riscv integrationtests ) endif() diff --git a/configs/DEMO_RISCV.yaml b/configs/DEMO_RISCV.yaml index 42e7418b55..5f3387ef1f 100644 --- a/configs/DEMO_RISCV.yaml +++ b/configs/DEMO_RISCV.yaml @@ -139,7 +139,7 @@ Latencies: CPU-Info: # Set Generate-Special-Dir to 'T' to generate the special files directory, or to 'F' to not. # (Not generating the special files directory may require the user to copy over files manually) - Generate-Special-Dir: true + Generate-Special-Dir: True # Core-Count MUST be 1 as multi-core is not supported at this time. (TX2 true value is 32) Core-Count: 1 # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (TX2 true value is 2) diff --git a/docs/sphinx/user/configuring_simeng.rst b/docs/sphinx/user/configuring_simeng.rst index cb5b31b215..344585f77b 100644 --- a/docs/sphinx/user/configuring_simeng.rst +++ b/docs/sphinx/user/configuring_simeng.rst @@ -349,9 +349,20 @@ CPU Info These fields are currently only used to generate a replica of the required Special Files directory structure. Generate-Special-Dir - Values are either "True" or "False". - Dictates whether or not SimEng should generate the SpecialFiles directory tree at runtime. - The alternative to this would be to copy in the required SpecialFiles by hand. + Values are either `True` or `False`. + Dictates whether or not SimEng should generate the Special-Files directory tree at runtime. + If your code requires Special-Files but you wish to use your own / existing files from a real system, you will need to set this option to `False`. + The files which are currently generated / supported in SimEng are: + + - `/proc/cpuinfo` + - `/proc/stat` + - `/sys/deviced/system/cpu/online` + - `/sys/deviced/system/cpu/cpu{0..CoreCount}/topology/core_id` + - `/sys/deviced/system/cpu/cpu{0..CoreCount}/topology/physical_package_id` + +Special-File-Dir-Path + Represented as a String; is the **absolute path** to the root directory where the Special-Files will be generated *OR* where existing Special-Files are located. + This is optional, and defaults to `SIMENG_BUILD_DIRECTORY/specialFiles`. The root directory must already exist. Core-Count Defines the total number of Physical cores (Not including threads). diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh index e08dc809ed..2e7b923a65 100644 --- a/src/include/simeng/CoreInstance.hh +++ b/src/include/simeng/CoreInstance.hh @@ -22,7 +22,7 @@ // Program used when no executable is provided; counts down from // 1024*1024, with an independent `orr` at the start of each branch. -uint32_t hex_[] = { +static uint32_t hex_[] = { 0x320C03E0, // orr w0, wzr, #1048576 0x320003E1, // orr w0, wzr, #1 0x71000400, // subs w0, w0, #1 @@ -102,8 +102,11 @@ class CoreInstance { /** Construct the special file directory. */ void createSpecialFileDirectory(); - /** Whether or not the source has been assembled by LLVM. */ - bool assembledSource_ = false; + /** The config file describing the modelled core to be created. */ + ryml::ConstNodeRef config_; + + /** The SimEng Linux kernel object. */ + simeng::kernel::Linux kernel_; /** Reference to source assembled by LLVM. */ char* source_ = nullptr; @@ -111,8 +114,8 @@ class CoreInstance { /** Size of the source code assembled by LLVM. */ size_t sourceSize_ = 0; - /** The config file describing the modelled core to be created. */ - ryml::ConstNodeRef config_; + /** Whether or not the source has been assembled by LLVM. */ + bool assembledSource_ = false; /** Reference to the SimEng linux process object. */ std::unique_ptr process_ = nullptr; @@ -123,9 +126,6 @@ class CoreInstance { /** The process memory space. */ std::shared_ptr processMemory_; - /** The SimEng Linux kernel object. */ - simeng::kernel::Linux kernel_; - /** Whether or not the dataMemory_ must be set manually. */ bool setDataMemory_ = false; diff --git a/src/include/simeng/Instruction.hh b/src/include/simeng/Instruction.hh index 81cf9d7e54..0b326f0ee5 100644 --- a/src/include/simeng/Instruction.hh +++ b/src/include/simeng/Instruction.hh @@ -256,7 +256,7 @@ class Instruction { /** An arbitrary index value for the micro-operation. Its use is based on the * implementation of specific micro-operations. */ - int microOpIndex_; + int microOpIndex_ = 0; }; } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 1a0c37dd98..96614b93d9 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -131,4 +131,16 @@ class RegisterValue { alignas(8) char value[MAX_LOCAL_BYTES]; }; +inline bool operator==(const RegisterValue& lhs, const RegisterValue& rhs) { + if (lhs.size() == rhs.size()) { + auto lhV = lhs.getAsVector(); + auto rhV = rhs.getAsVector(); + for (int i = 0; i < lhs.size(); i++) { + if (lhV[i] != rhV[i]) return false; + } + return true; + } + return false; +} + } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/SpecialFileDirGen.hh b/src/include/simeng/SpecialFileDirGen.hh index a907727f54..a60c0d54ca 100644 --- a/src/include/simeng/SpecialFileDirGen.hh +++ b/src/include/simeng/SpecialFileDirGen.hh @@ -4,7 +4,6 @@ #include #include "simeng/config/SimInfo.hh" -#include "simeng/version.hh" namespace simeng { class SpecialFileDirGen { @@ -22,7 +21,7 @@ class SpecialFileDirGen { private: /** Path to the root of the SimEng special files directory. */ - const std::string specialFilesDir_ = SIMENG_BUILD_DIR "/specialFiles"; + const std::string specialFilesDir_; /** Values declared in YAML config file needed to create the Special Files * Directory tree. */ diff --git a/src/include/simeng/arch/aarch64/ExceptionHandler.hh b/src/include/simeng/arch/aarch64/ExceptionHandler.hh index 3e59bc58eb..0f2a7c546c 100644 --- a/src/include/simeng/arch/aarch64/ExceptionHandler.hh +++ b/src/include/simeng/arch/aarch64/ExceptionHandler.hh @@ -96,6 +96,16 @@ class ExceptionHandler : public simeng::arch::ExceptionHandler { static constexpr Register R3 = {RegisterType::GENERAL, 3}; static constexpr Register R4 = {RegisterType::GENERAL, 4}; static constexpr Register R5 = {RegisterType::GENERAL, 5}; + + /** Let the following ExceptionHandlerTest derived classes be a friend of this + * class to allow proper testing of `readStringThen()`, `readBufferThen()` and + * `printException()` functions. */ + friend class AArch64ExceptionHandlerTest_readStringThen_Test; + friend class AArch64ExceptionHandlerTest_readStringThen_maxLen0_Test; + friend class AArch64ExceptionHandlerTest_readStringThen_maxLenReached_Test; + friend class AArch64ExceptionHandlerTest_readBufferThen_Test; + friend class AArch64ExceptionHandlerTest_readBufferThen_length0_Test; + friend class AArch64ExceptionHandlerTest_printException_Test; }; } // namespace aarch64 diff --git a/src/include/simeng/arch/aarch64/Instruction.hh b/src/include/simeng/arch/aarch64/Instruction.hh index d61bb43582..92ac0bc96d 100644 --- a/src/include/simeng/arch/aarch64/Instruction.hh +++ b/src/include/simeng/arch/aarch64/Instruction.hh @@ -13,38 +13,6 @@ namespace simeng { namespace arch { namespace aarch64 { -/** Apply the shift specified by `shiftType` to the unsigned integer `value`, - * shifting by `amount`. */ -template -std::enable_if_t && std::is_unsigned_v, T> shiftValue( - T value, uint8_t shiftType, uint8_t amount) { - switch (shiftType) { - case ARM64_SFT_LSL: - return value << amount; - case ARM64_SFT_LSR: - return value >> amount; - case ARM64_SFT_ASR: - return static_cast>(value) >> amount; - case ARM64_SFT_ROR: { - // Assuming sizeof(T) is a power of 2. - const auto mask = sizeof(T) * 8 - 1; - assert((amount <= mask) && "Rotate amount exceeds type width"); - amount &= mask; - return (value >> amount) | (value << ((-amount) & mask)); - } - case ARM64_SFT_MSL: { - // pad in with ones instead of zeros - const auto mask = (1 << amount) - 1; - return (value << amount) | mask; - } - case ARM64_SFT_INVALID: - return value; - default: - assert(false && "Unknown shift type"); - return 0; - } -} - /** Get the size of the data to be accessed from/to memory. */ inline uint8_t getDataSize(cs_arm64_op op) { // Check from top of the range downwards @@ -203,6 +171,9 @@ const uint8_t NZCV = 3; const uint8_t SYSTEM = 4; /** The [256-byte x (SVL / 8)] SME matrix register za. */ const uint8_t MATRIX = 5; + +/** A special register value representing the zero register. */ +const Register ZERO_REGISTER = {GENERAL, (uint16_t)-1}; } // namespace RegisterType /** A struct holding user-defined execution information for a aarch64 @@ -222,7 +193,6 @@ struct ExecutionInfo { enum class InstructionException { None = 0, EncodingUnallocated, - EncodingNotYetImplemented, ExecutionNotYetImplemented, AliasNotYetImplemented, MisalignedPC, @@ -366,11 +336,6 @@ class Instruction : public simeng::Instruction { /** Retrieve the instruction's associated architecture. */ const Architecture& getArchitecture() const; - /** A special register value representing the zero register. If passed to - * `setSourceRegisters`/`setDestinationRegisters`, the value will be - * automatically supplied as zero. */ - static const Register ZERO_REGISTER; - private: /** A reference to the ISA instance this instruction belongs to. */ const Architecture& architecture_; @@ -380,11 +345,10 @@ class Instruction : public simeng::Instruction { /** A vector of source registers. */ std::vector sourceRegisters; - /** The number of source registers this instruction reads from. */ - uint16_t sourceRegisterCount = 0; /** A vector of destination registers. */ std::vector destinationRegisters; + /** The number of destination registers this instruction writes to. */ uint16_t destinationRegisterCount = 0; @@ -404,15 +368,6 @@ class Instruction : public simeng::Instruction { * registers. */ void decode(); - /** Set the source registers of the instruction, and create a corresponding - * operands vector. Zero register references will be pre-supplied with a value - * of 0. */ - void setSourceRegisters(const std::vector& registers); - - /** Set the destination registers for the instruction, and create a - * corresponding results vector. */ - void setDestinationRegisters(const std::vector& registers); - // Scheduling /** The number of operands that have not yet had values supplied. Used to * determine execution readiness. */ @@ -499,14 +454,6 @@ class Instruction : public simeng::Instruction { * for sending to memory (according to instruction type). Each entry * corresponds to a `memoryAddresses` entry. */ std::vector memoryData; - - // Execution helpers - /** Extend `value` according to `extendType`, and left-shift the result by - * `shift` */ - uint64_t extendValue(uint64_t value, uint8_t extendType, uint8_t shift) const; - - /** Extend `value` using extension/shifting rules defined in `op`. */ - uint64_t extendOffset(uint64_t value, const cs_arm64_op& op) const; }; } // namespace aarch64 diff --git a/src/include/simeng/arch/aarch64/MicroDecoder.hh b/src/include/simeng/arch/aarch64/MicroDecoder.hh index f91f4041d7..f13fb02077 100644 --- a/src/include/simeng/arch/aarch64/MicroDecoder.hh +++ b/src/include/simeng/arch/aarch64/MicroDecoder.hh @@ -30,6 +30,7 @@ class MicroDecoder { const Instruction& macroOp, MacroOp& output, csh capstoneHandle); + private: /** Detect if there's an overlap between the underlying hardware registers * (e.g. z5, v5, q5, d5, s5, h5, and b5). */ bool detectOverlap(arm64_reg registerA, arm64_reg registerB); @@ -67,7 +68,6 @@ class MicroDecoder { csh capstoneHandle, bool lastMicroOp = false, int microOpIndex = 0, uint8_t dataSize = 0); - private: /** Flag to determine whether instruction splitting is enabled. */ const bool instructionSplit_; diff --git a/src/include/simeng/arch/aarch64/helpers/arithmetic.hh b/src/include/simeng/arch/aarch64/helpers/arithmetic.hh index 13485c16fa..cdf8a19ed7 100644 --- a/src/include/simeng/arch/aarch64/helpers/arithmetic.hh +++ b/src/include/simeng/arch/aarch64/helpers/arithmetic.hh @@ -5,204 +5,190 @@ namespace simeng { namespace arch { namespace aarch64 { -class arithmeticHelp { - public: - /** Helper function for instructions with the format `add rd, rn, rm`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type T. */ - template - static T add_3ops(std::vector& operands) { - const T n = operands[0].get(); - const T m = operands[1].get(); - return (n + m); - } - /** Helper function for instructions with the format `adc rd, rn, rm`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of [resulting value, nzcv]. */ - template - static std::tuple addCarry_3ops( - std::vector& operands) { - const uint8_t carry = operands[0].get() & 0b0010; - const T n = operands[1].get(); - const T m = operands[2].get(); - return AuxFunc::addWithCarry(n, m, carry); - } +/** Helper function for instructions with the format `add rd, rn, rm`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type T. */ +template +T add_3ops(std::vector& operands) { + const T n = operands[0].get(); + const T m = operands[1].get(); + return (n + m); +} - /** Helper function for instructions with the format `add rd, rn, rm{, extend - * {#amount}}`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of [resulting value, nzcv]. */ - template - static std::tuple addExtend_3ops( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - bool calcNZCV) { - const T n = operands[0].get(); - const T m = - AuxFunc::extendValue(operands[1].get(), metadata.operands[2].ext, - metadata.operands[2].shift.value); - if (calcNZCV) return AuxFunc::addWithCarry(n, m, 0); - return {(n + m), 0}; - } +/** Helper function for instructions with the format `adc rd, rn, rm`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of [resulting value, nzcv]. */ +template +std::tuple addCarry_3ops(std::vector& operands) { + const uint8_t carry = operands[0].get() & 0b0010; + const T n = operands[1].get(); + const T m = operands[2].get(); + return addWithCarry(n, m, carry); +} - /** Helper function for instructions with the format `add rd, rn, rm{, shift - * #amount}`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of [resulting value, nzcv]. */ - template - static std::tuple addShift_3ops( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - bool calcNZCV) { - const T n = operands[0].get(); - const T m = - shiftValue(operands[1].get(), metadata.operands[2].shift.type, - metadata.operands[2].shift.value); - if (calcNZCV) return AuxFunc::addWithCarry(n, m, 0); - return {(n + m), 0}; - } +/** Helper function for instructions with the format `add rd, rn, rm{, extend + * {#amount}}`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of [resulting value, nzcv]. */ +template +std::tuple addExtend_3ops( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) { + const T n = operands[0].get(); + const T m = extendValue(operands[1].get(), metadata.operands[2].ext, + metadata.operands[2].shift.value); + if (calcNZCV) return addWithCarry(n, m, 0); + return {(n + m), 0}; +} - /** Helper function for instructions with the format `add rd, rn, #imm{, shift - * #amount}`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of [resulting value, nzcv]. */ - template - static std::tuple addShift_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - bool calcNZCV) { - const T n = operands[0].get(); - const T m = shiftValue(static_cast(metadata.operands[2].imm), - metadata.operands[2].shift.type, - metadata.operands[2].shift.value); - if (calcNZCV) return AuxFunc::addWithCarry(n, m, 0); - return {(n + m), 0}; - } +/** Helper function for instructions with the format `add rd, rn, rm{, shift + * #amount}`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of [resulting value, nzcv]. */ +template +std::tuple addShift_3ops( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) { + const T n = operands[0].get(); + const T m = shiftValue(operands[1].get(), metadata.operands[2].shift.type, + metadata.operands[2].shift.value); + if (calcNZCV) return addWithCarry(n, m, 0); + return {(n + m), 0}; +} + +/** Helper function for instructions with the format `add rd, rn, #imm{, shift + * #amount}`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of [resulting value, nzcv]. */ +template +std::tuple addShift_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) { + const T n = operands[0].get(); + const T m = shiftValue(static_cast(metadata.operands[2].imm), + metadata.operands[2].shift.type, + metadata.operands[2].shift.value); + if (calcNZCV) return addWithCarry(n, m, 0); + return {(n + m), 0}; +} - /** Helper function for instructions with the format `clz rd, rn`. - * T represents the type of operands (e.g. for xn, T = int64_t). - * Returns single value of type T. */ - template - static T clz_reg(std::vector& operands) { - T x = operands[0].get(); - uint8_t i; - for (i = 0; i < (sizeof(T) * 8); i++) { - // Left-shift x until it's negative or we run out of bits - if (x < 0) { - break; - } - x <<= 1; +/** Helper function for instructions with the format `clz rd, rn`. + * T represents the type of operands (e.g. for xn, T = int64_t). + * Returns single value of type T. */ +template +T clz_reg(std::vector& operands) { + T x = operands[0].get(); + uint8_t i; + for (i = 0; i < (sizeof(T) * 8); i++) { + // Left-shift x until it's negative or we run out of bits + if (x < 0) { + break; } - return i; + x <<= 1; } + return i; +} - /** Helper function for instructions with the format `movk d, #imm`. - * T represents the type of operands (e.g. for xd, T = uint64_t). - * Returns single value of type T. */ - template - static T movkShift_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - // Clear 16-bit region offset by `shift` and replace with immediate - uint8_t shift = metadata.operands[1].shift.value; - T mask = ~(static_cast(0xFFFF) << shift); - T value = - (operands[0].get() & mask) | (metadata.operands[1].imm << shift); - return value; - } +/** Helper function for instructions with the format `movk d, #imm`. + * T represents the type of operands (e.g. for xd, T = uint64_t). + * Returns single value of type T. */ +template +T movkShift_imm(std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + // Clear 16-bit region offset by `shift` and replace with immediate + uint8_t shift = metadata.operands[1].shift.value; + T mask = ~(static_cast(0xFFFF) << shift); + T value = (operands[0].get() & mask) | (metadata.operands[1].imm << shift); + return value; +} - /** Helper function for instructions with the format `mov d, #imm{, - * lsl #shift}`. - * T represents the type of operands (e.g. for xd, T = uint64_t). - * Returns single value og type uint64_t. */ - template - static uint64_t movnShift_imm( - const simeng::arch::aarch64::InstructionMetadata& metadata, - std::function func) { - uint8_t shift = metadata.operands[1].shift.value; - T value = func(static_cast(metadata.operands[1].imm) << shift); - return static_cast(value); - } +/** Helper function for instructions with the format `mov d, #imm{, + * lsl #shift}`. + * T represents the type of operands (e.g. for xd, T = uint64_t). + * Returns single value og type uint64_t. */ +template +uint64_t movnShift_imm( + const simeng::arch::aarch64::InstructionMetadata& metadata, + std::function func) { + uint8_t shift = metadata.operands[1].shift.value; + T value = func(static_cast(metadata.operands[1].imm) << shift); + return static_cast(value); +} - /** Helper function for instructions with the format `msubl xd, wn, wm, xa`. - * D represents the type of the destination register (either int64_t or - * uint64_t). - * N represents the type of the first source register (either - * int32_t or uint32_t). - * Returns single value of type D. */ - template - static D msubl_4ops(std::vector& operands) { - const N n = operands[0].get(); - const N m = operands[1].get(); - const D a = operands[2].get(); - return (a - (n * m)); - } +/** Helper function for instructions with the format `msubl xd, wn, wm, xa`. + * D represents the type of the destination register (either int64_t or + * uint64_t). + * N represents the type of the first source register (either + * int32_t or uint32_t). + * Returns single value of type D. */ +template +D msubl_4ops(std::vector& operands) { + const N n = operands[0].get(); + const N m = operands[1].get(); + const D a = operands[2].get(); + return (a - (n * m)); +} - /** Helper function for instructions with the format `sbc rd, rn, rm`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type T. */ - template - static T sbc(std::vector& operands) { - auto nzcv = operands[0].get(); - const T x = operands[1].get(); - const T y = operands[2].get(); - T result; - std::tie(result, std::ignore) = - AuxFunc::addWithCarry(x, ~y, (nzcv >> 1) & 1); - return result; - } +/** Helper function for instructions with the format `sbc rd, rn, rm`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type T. */ +template +T sbc(std::vector& operands) { + auto nzcv = operands[0].get(); + const T x = operands[1].get(); + const T y = operands[2].get(); + T result; + std::tie(result, std::ignore) = addWithCarry(x, ~y, (nzcv >> 1) & 1); + return result; +} - /** Helper function for instructions with the format `sub{s} rd, rn, rm{, - * extend #amount}`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of [resulting value, nzcv]. */ - template - static std::tuple subExtend_3ops( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - bool calcNZCV) { - const T n = operands[0].get(); - const T m = static_cast( - AuxFunc::extendValue(operands[1].get(), metadata.operands[2].ext, - metadata.operands[2].shift.value)); - if (calcNZCV) return AuxFunc::addWithCarry(n, ~m, true); - return {(n - m), 0}; - } +/** Helper function for instructions with the format `sub{s} rd, rn, rm{, + * extend #amount}`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of [resulting value, nzcv]. */ +template +std::tuple subExtend_3ops( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) { + const T n = operands[0].get(); + const T m = + static_cast(extendValue(operands[1].get(), metadata.operands[2].ext, + metadata.operands[2].shift.value)); + if (calcNZCV) return addWithCarry(n, ~m, true); + return {(n - m), 0}; +} - /** Helper function for instructions with the format `sub{s} rd, rn, #imm`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type T. */ - template - static std::tuple subShift_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - bool calcNZCV) { - const T n = operands[0].get(); - const T m = shiftValue(static_cast(metadata.operands[2].imm), - metadata.operands[2].shift.type, - metadata.operands[2].shift.value); - if (calcNZCV) return AuxFunc::addWithCarry(n, ~m, true); - return {(n - m), 0}; - } +/** Helper function for instructions with the format `sub{s} rd, rn, #imm`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type T. */ +template +std::tuple subShift_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) { + const T n = operands[0].get(); + const T m = shiftValue(static_cast(metadata.operands[2].imm), + metadata.operands[2].shift.type, + metadata.operands[2].shift.value); + if (calcNZCV) return addWithCarry(n, ~m, true); + return {(n - m), 0}; +} + +/** Helper function for instructions with the format `sub{s} rd, rn, rm{, + * shift #amount}`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of [resulting value, nzcv]. */ +template +std::tuple subShift_3ops( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) { + const T n = operands[0].get(); + const T m = shiftValue(operands[1].get(), metadata.operands[2].shift.type, + metadata.operands[2].shift.value); + if (calcNZCV) return addWithCarry(n, ~m, true); + return {(n - m), 0}; +} - /** Helper function for instructions with the format `sub{s} rd, rn, rm{, - * shift #amount}`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of [resulting value, nzcv]. */ - template - static std::tuple subShift_3ops( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - bool calcNZCV) { - const T n = operands[0].get(); - const T m = - shiftValue(operands[1].get(), metadata.operands[2].shift.type, - metadata.operands[2].shift.value); - if (calcNZCV) return AuxFunc::addWithCarry(n, ~m, true); - return {(n - m), 0}; - } -}; } // namespace aarch64 } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/auxiliaryFunctions.hh b/src/include/simeng/arch/aarch64/helpers/auxiliaryFunctions.hh index 036df3f061..2a612cea8f 100644 --- a/src/include/simeng/arch/aarch64/helpers/auxiliaryFunctions.hh +++ b/src/include/simeng/arch/aarch64/helpers/auxiliaryFunctions.hh @@ -11,315 +11,326 @@ namespace simeng { namespace arch { namespace aarch64 { -class AuxFunc { - public: - /** Performs a type agnostic add with carry. */ - template - static std::tuple addWithCarry(T x, T y, bool carryIn) { - T result = x + y + carryIn; - - bool n = (result >> (sizeof(T) * 8 - 1)); - bool z = (result == 0); - - // Trying to calculate whether `result` overflows (`x + y + carryIn > max`). - bool c; - if (carryIn && x + 1 == 0) { - // Implies `x` is max; with a carry set, it will definitely overflow - c = true; - } else { - // We know x + carryIn <= max, so can safely subtract and compare against - // y max > x + y + c == max - x > y + c - c = ((std::numeric_limits::max() - x - carryIn) < y); - } - - // Calculate whether signed result overflows - bool v = false; - typedef std::make_signed_t ST; - auto sx = static_cast(x); - auto sy = static_cast(y); - if (sx >= 0) { - // Check if (x + y + c) > MAX - // y > (MAX - x - c) - v = sy > (std::numeric_limits::max() - sx - carryIn); - } else { - // Check if (x + y + c) < MIN - // y < (MIN - x - c) - v = sy < (std::numeric_limits::min() - sx - carryIn); - } - return {result, nzcv(n, z, c, v)}; +/** Returns a correctly formatted nzcv value. */ +inline uint8_t nzcv(bool n, bool z, bool c, bool v) { + return (n << 3) | (z << 2) | (c << 1) | v; +} + +/** Performs a type agnostic unsigned add with carry. */ +template +inline std::enable_if_t && std::is_unsigned_v, + std::tuple> +addWithCarry(T x, T y, bool carryIn) { + T result = x + y + carryIn; + + bool n = (result >> (sizeof(T) * 8 - 1)); + bool z = (result == 0); + + // Trying to calculate whether `result` overflows (`x + y + carryIn > max`). + bool c; + if (carryIn && x + 1 == 0) { + // Implies `x` is max; with a carry set, it will definitely overflow + c = true; + } else { + // We know x + carryIn <= max, so can safely subtract and compare against + // y max > x + y + c == max - x > y + c + c = ((std::numeric_limits::max() - x - carryIn) < y); } - /** Manipulate the bitfield `value` according to the logic of the (U|S)BFM - * Armv9.2-a instructions. */ - template - static std::enable_if_t && std::is_unsigned_v, T> - bitfieldManipulate(T value, T dest, uint8_t rotateBy, uint8_t sourceBits, - bool signExtend = false) { - size_t bits = sizeof(T) * 8; - - T source; - T destMask; - uint8_t highestBit = sourceBits; - if (sourceBits >= rotateBy) { - // Mask of values [rotateBy:source+1] - destMask = (static_cast(-1) << (sourceBits - rotateBy + 1)); - source = value >> rotateBy; - highestBit -= rotateBy; - } else { - T upper = (static_cast(-1) << (bits - rotateBy)); - T lower = (static_cast(-1) >> (rotateBy - sourceBits - 1)); - destMask = upper ^ lower; - source = value << (bits - rotateBy); - highestBit += (bits - rotateBy); - } - - T result = (dest & destMask) | (source & ~destMask); + // Calculate whether signed result overflows + bool v = false; + typedef std::make_signed_t ST; + auto sx = static_cast(x); + auto sy = static_cast(y); + if (sx >= 0) { + // Check if (x + y + c) > MAX + // y > (MAX - x - c) + v = sy > (std::numeric_limits::max() - sx - carryIn); + } else { + // Check if (x + y + c) < MIN + // y < (MIN - x - c) + v = sy < (std::numeric_limits::min() - sx - carryIn); + } - if (!signExtend) { - return result; - } + return {result, nzcv(n, z, c, v)}; +} + +/** Manipulate the bitfield `value` according to the logic of the (U|S)BFM + * Armv9.2-a instructions. */ +template +inline std::enable_if_t && std::is_unsigned_v, T> +bitfieldManipulate(T value, T dest, uint8_t rotateBy, uint8_t sourceBits, + bool signExtend = false) { + size_t bits = sizeof(T) * 8; + + T source; + T destMask; + uint8_t highestBit = sourceBits; + if (sourceBits >= rotateBy) { + // Mask of values [rotateBy:source+1] + destMask = (static_cast(-1) << (sourceBits - rotateBy + 1)); + source = value >> rotateBy; + highestBit -= rotateBy; + } else { + T upper = (static_cast(-1) << (bits - rotateBy)); + T lower = (static_cast(-1) >> (rotateBy - sourceBits - 1)); + destMask = upper ^ lower; + source = value << (bits - rotateBy); + highestBit += (bits - rotateBy); + } - if (highestBit > bits) { - // Nothing to do; implicitly sign-extended - return result; - } + T result = (dest & destMask) | (source & ~destMask); - // Let the compiler do sign-extension for us. - uint8_t shiftAmount = bits - highestBit - 1; - // Shift the bitfield up, and cast to a signed type, so the highest bit is - // now the sign bit - auto shifted = static_cast>(result << shiftAmount); - // Shift the bitfield back to where it was; as it's a signed type, the - // compiler will sign-extend the highest bit - return shifted >> shiftAmount; + if (!signExtend) { + return result; } - /** Function to check if NZCV conditions hold. */ - static bool conditionHolds(uint8_t cond, uint8_t nzcv) { - if (cond == 0b1111) { - return true; - } - - bool inverse = cond & 1; - uint8_t upper = cond >> 1; - bool n = (nzcv >> 3) & 1; - bool z = (nzcv >> 2) & 1; - bool c = (nzcv >> 1) & 1; - bool v = nzcv & 1; - bool result; - switch (upper) { - case 0b000: - result = z; - break; // EQ/NE - case 0b001: - result = c; - break; // CS/CC - case 0b010: - result = n; - break; // MI/PL - case 0b011: - result = v; - break; // VS/VC - case 0b100: - result = (c && !z); - break; // HI/LS - case 0b101: - result = (n == v); - break; // GE/LT - case 0b110: - result = (n == v && !z); - break; // GT/LE - default: // 0b111, AL - result = true; - } + if (highestBit > bits) { + // Nothing to do; implicitly sign-extended + return result; + } - return (inverse ? !result : result); + // Let the compiler do sign-extension for us. + uint8_t shiftAmount = bits - highestBit - 1; + // Shift the bitfield up, and cast to a signed type, so the highest bit is + // now the sign bit + auto shifted = static_cast>(result << shiftAmount); + // Shift the bitfield back to where it was; as it's a signed type, the + // compiler will sign-extend the highest bit + return shifted >> shiftAmount; +} + +/** Function to check if NZCV conditions hold. */ +inline bool conditionHolds(uint8_t cond, uint8_t nzcv) { + bool inverse = cond & 1; + uint8_t upper = cond >> 1; + bool n = (nzcv >> 3) & 1; + bool z = (nzcv >> 2) & 1; + bool c = (nzcv >> 1) & 1; + bool v = nzcv & 1; + bool result; + switch (upper) { + case 0b000: + result = z; + break; // EQ/NE + case 0b001: + result = c; + break; // CS/CC + case 0b010: + result = n; + break; // MI/PL + case 0b011: + result = v; + break; // VS/VC + case 0b100: + result = (c && !z); + break; // HI/LS + case 0b101: + result = (n == v); + break; // GE/LT + case 0b110: + result = (n == v && !z); + break; // GT/LE + default: // 0b111, AL + // AL returns true regardless of inverse value + result = (true ^ inverse); + } + return (result ^ inverse); +} + +/** Extend `value` according to `extendType`, and left-shift the result by + * `shift`. Replicated from Instruction.cc */ +inline uint64_t extendValue(uint64_t value, uint8_t extendType, uint8_t shift) { + if (extendType == ARM64_EXT_INVALID && shift == 0) { + // Special case: an invalid shift type with a shift amount of 0 implies an + // identity operation + return value; } - // Rounding function that rounds a double to nearest integer (64-bit). In - // event of a tie (i.e. 7.5) it will be rounded to the nearest even number. - template - static OUT roundToNearestTiesToEven(IN input) { - IN half = static_cast(0.5); - if (std::fabs(input - std::trunc(input)) == half) { - OUT truncd = static_cast(std::trunc(input)); - // if value is negative, then may need to -1 from truncd, else may need to - // +1. - OUT addand = (truncd > 0) ? 1 : -1; - return ((truncd % 2 == 0) ? truncd : (truncd + addand)); - } - // Otherwise round to nearest - return static_cast(std::round(input)); + uint64_t extended; + switch (extendType) { + case ARM64_EXT_UXTB: + extended = static_cast(value); + break; + case ARM64_EXT_UXTH: + extended = static_cast(value); + break; + case ARM64_EXT_UXTW: + extended = static_cast(value); + break; + case ARM64_EXT_UXTX: + extended = value; + break; + case ARM64_EXT_SXTB: + extended = static_cast(value); + break; + case ARM64_EXT_SXTH: + extended = static_cast(value); + break; + case ARM64_EXT_SXTW: + extended = static_cast(value); + break; + case ARM64_EXT_SXTX: + extended = value; + break; + default: + assert(false && "Invalid extension type"); + return 0; } - /** Extend `value` according to `extendType`, and left-shift the result by - * `shift`. Replicated from Instruction.cc */ - static uint64_t extendValue(uint64_t value, uint8_t extendType, - uint8_t shift) { - if (extendType == ARM64_EXT_INVALID && shift == 0) { - // Special case: an invalid shift type with a shift amount of 0 implies an - // identity operation + return extended << shift; +} + +/** Extend `value` using extension/shifting rules defined in `op`. */ +inline uint64_t extendOffset(uint64_t value, const cs_arm64_op& op) { + if (op.ext == 0) { + if (op.shift.value == 0) { return value; } - - uint64_t extended; - switch (extendType) { - case ARM64_EXT_UXTB: - extended = static_cast(value); - break; - case ARM64_EXT_UXTH: - extended = static_cast(value); - break; - case ARM64_EXT_UXTW: - extended = static_cast(value); - break; - case ARM64_EXT_UXTX: - extended = value; - break; - case ARM64_EXT_SXTB: - extended = static_cast(value); - break; - case ARM64_EXT_SXTH: - extended = static_cast(value); - break; - case ARM64_EXT_SXTW: - extended = static_cast(value); - break; - case ARM64_EXT_SXTX: - extended = value; - break; - default: - assert(false && "Invalid extension type"); - return 0; + if (op.shift.type == 1) { + return extendValue(value, ARM64_EXT_UXTX, op.shift.value); } - - return extended << shift; } - - // Rounding function that rounds a float to nearest integer (32-bit). In event - // of a tie (i.e. 7.5) it will be rounded to the nearest even number. - static int32_t floatRoundToNearestTiesToEven(float input) { - if (std::fabs(input - std::trunc(input)) == 0.5f) { - if (static_cast(input - 0.5f) % 2 == 0) { - return static_cast(input - 0.5f); - } else { - return static_cast(input + 0.5f); - } + return extendValue(value, op.ext, op.shift.value); +} + +/** Calculate the corresponding NZCV values from select SVE instructions that + * set the First(N), None(Z), !Last(C) condition flags based on the predicate + * result, and the V flag to 0. */ +inline uint8_t getNZCVfromPred(std::array predResult, + uint64_t VL_bits, int byteCount) { + uint8_t N = (predResult[0] & 1); + uint8_t Z = 1; + // (int)(VL_bits - 1)/512 derives which block of 64-bits within the + // predicate register we're working in. 1ull << (VL_bits / 8) - byteCount) + // derives a 1 in the last position of the current predicate. Both + // dictated by vector length. + uint8_t C = !(predResult[(int)((VL_bits - 1) / 512)] & + 1ull << (((VL_bits / 8) - byteCount) % 64)); + for (int i = 0; i < (int)((VL_bits - 1) / 512) + 1; i++) { + if (predResult[i]) { + Z = 0; + break; } - // Otherwise round to nearest - return static_cast(std::round(input)); } - - /** Calculate the corresponding NZCV values from select SVE instructions that - * set the First(N), None(Z), !Last(C) condition flags based on the predicate - * result, and the V flag to 0. */ - static uint8_t getNZCVfromPred(std::array predResult, - uint64_t VL_bits, int byteCount) { - uint8_t N = (predResult[0] & 1); - uint8_t Z = 1; - // (int)(VL_bits - 1)/512 derives which block of 64-bits within the - // predicate register we're working in. 1ull << (VL_bits / 8) - byteCount) - // derives a 1 in the last position of the current predicate. Both - // dictated by vector length. - uint8_t C = !(predResult[(int)((VL_bits - 1) / 512)] & - 1ull << (((VL_bits / 8) - byteCount) % 64)); - for (int i = 0; i < (int)((VL_bits - 1) / 512) + 1; i++) { - if (predResult[i]) { - Z = 0; - break; - } + return nzcv(N, Z, C, 0); +} + +/** Multiply `a` and `b`, and return the high 64 bits of the result. + * https://stackoverflow.com/a/28904636 */ +inline uint64_t mulhi(uint64_t a, uint64_t b) { + uint64_t a_lo = (uint32_t)a; + uint64_t a_hi = a >> 32; + uint64_t b_lo = (uint32_t)b; + uint64_t b_hi = b >> 32; + + uint64_t a_x_b_hi = a_hi * b_hi; + uint64_t a_x_b_mid = a_hi * b_lo; + uint64_t b_x_a_mid = b_hi * a_lo; + uint64_t a_x_b_lo = a_lo * b_lo; + + uint64_t carry_bit = ((uint64_t)(uint32_t)a_x_b_mid + + (uint64_t)(uint32_t)b_x_a_mid + (a_x_b_lo >> 32)) >> + 32; + + uint64_t multhi = + a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; + + return multhi; +} + +/** Decode the instruction pattern from OperandStr. */ +inline uint16_t sveGetPattern(const std::string operandStr, const uint8_t esize, + const uint16_t VL_) { + const uint16_t elements = VL_ / esize; + const std::vector patterns = { + "pow2", "vl1", "vl2", "vl3", "vl4", "vl5", "vl6", "vl7", "vl8", + "vl16", "vl32", "vl64", "vl128", "vl256", "mul3", "mul4", "all"}; + + // If no pattern present in operandStr then same behaviour as ALL + std::string pattern = "all"; + for (uint8_t i = 0; i < patterns.size(); i++) { + if (operandStr.find(patterns[i]) != std::string::npos) { + pattern = patterns[i]; + // Don't break when pattern found as vl1 will be found in vl128 etc } - return nzcv(N, Z, C, 0); - } - - /** Multiply `a` and `b`, and return the high 64 bits of the result. - * https://stackoverflow.com/a/28904636 */ - static uint64_t mulhi(uint64_t a, uint64_t b) { - uint64_t a_lo = (uint32_t)a; - uint64_t a_hi = a >> 32; - uint64_t b_lo = (uint32_t)b; - uint64_t b_hi = b >> 32; - - uint64_t a_x_b_hi = a_hi * b_hi; - uint64_t a_x_b_mid = a_hi * b_lo; - uint64_t b_x_a_mid = b_hi * a_lo; - uint64_t a_x_b_lo = a_lo * b_lo; - - uint64_t carry_bit = ((uint64_t)(uint32_t)a_x_b_mid + - (uint64_t)(uint32_t)b_x_a_mid + (a_x_b_lo >> 32)) >> - 32; - - uint64_t multhi = - a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; - - return multhi; - } - - /** Returns a correctly formatted nzcv value. */ - static uint8_t nzcv(bool n, bool z, bool c, bool v) { - return (n << 3) | (z << 2) | (c << 1) | v; } - /** Decode the instruction pattern from OperandStr. */ - static uint16_t sveGetPattern(const std::string operandStr, - const uint8_t esize, const uint16_t VL_) { - const uint16_t elements = VL_ / esize; - const std::vector patterns = { - "pow2", "vl1", "vl2", "vl3", "vl4", "vl5", "vl6", "vl7", "vl8", - "vl16", "vl32", "vl64", "vl128", "vl256", "mul3", "mul4", "all"}; - - // If no pattern present in operandStr then same behaviour as ALL - std::string pattern = "all"; - for (uint8_t i = 0; i < patterns.size(); i++) { - if (operandStr.find(patterns[i]) != std::string::npos) { - pattern = patterns[i]; - // Don't break when pattern found as vl1 will be found in vl128 etc - } + if (pattern == "all") + return elements; + else if (pattern == "pow2") { + int n = 1; + while (elements >= std::pow(2, n)) { + n = n + 1; } - - if (pattern == "all") - return elements; - else if (pattern == "pow2") { - int n = 1; - while (elements >= std::pow(2, n)) { - n = n + 1; - } - return std::pow(2, n - 1); - } else if (pattern == "vl1") - return (elements >= 1) ? 1 : 0; - else if (pattern == "vl2") - return (elements >= 2) ? 2 : 0; - else if (pattern == "vl3") - return (elements >= 3) ? 3 : 0; - else if (pattern == "vl4") - return (elements >= 4) ? 4 : 0; - else if (pattern == "vl5") - return (elements >= 5) ? 5 : 0; - else if (pattern == "vl6") - return (elements >= 6) ? 6 : 0; - else if (pattern == "vl7") - return (elements >= 7) ? 7 : 0; - else if (pattern == "vl8") - return (elements >= 8) ? 8 : 0; - else if (pattern == "vl16") - return (elements >= 16) ? 16 : 0; - else if (pattern == "vl32") - return (elements >= 32) ? 32 : 0; - else if (pattern == "vl64") - return (elements >= 64) ? 64 : 0; - else if (pattern == "vl128") - return (elements >= 128) ? 128 : 0; - else if (pattern == "vl256") - return (elements >= 256) ? 256 : 0; - else if (pattern == "mul4") - return elements - (elements % 4); - else if (pattern == "mul3") - return elements - (elements % 3); - - return 0; + return std::pow(2, n - 1); + } else if (pattern == "vl1") + return (elements >= 1) ? 1 : 0; + else if (pattern == "vl2") + return (elements >= 2) ? 2 : 0; + else if (pattern == "vl3") + return (elements >= 3) ? 3 : 0; + else if (pattern == "vl4") + return (elements >= 4) ? 4 : 0; + else if (pattern == "vl5") + return (elements >= 5) ? 5 : 0; + else if (pattern == "vl6") + return (elements >= 6) ? 6 : 0; + else if (pattern == "vl7") + return (elements >= 7) ? 7 : 0; + else if (pattern == "vl8") + return (elements >= 8) ? 8 : 0; + else if (pattern == "vl16") + return (elements >= 16) ? 16 : 0; + else if (pattern == "vl32") + return (elements >= 32) ? 32 : 0; + else if (pattern == "vl64") + return (elements >= 64) ? 64 : 0; + else if (pattern == "vl128") + return (elements >= 128) ? 128 : 0; + else if (pattern == "vl256") + return (elements >= 256) ? 256 : 0; + else if (pattern == "mul4") + return elements - (elements % 4); + else if (pattern == "mul3") + return elements - (elements % 3); + + return 0; +} + +/** Apply the shift specified by `shiftType` to the unsigned integer `value`, + * shifting by `amount`. */ +template +inline std::enable_if_t && std::is_unsigned_v, T> +shiftValue(T value, uint8_t shiftType, uint8_t amount) { + switch (shiftType) { + case ARM64_SFT_LSL: + return value << amount; + case ARM64_SFT_LSR: + return value >> amount; + case ARM64_SFT_ASR: + return static_cast>(value) >> amount; + case ARM64_SFT_ROR: { + // Assuming sizeof(T) is a power of 2. + const T mask = sizeof(T) * 8 - 1; + assert((amount <= mask) && "Rotate amount exceeds type width"); + amount &= mask; + return (value >> amount) | (value << ((-amount) & mask)); + } + case ARM64_SFT_MSL: { + // pad in with ones instead of zeros + const T mask = (static_cast(1) << static_cast(amount)) - 1; + return (value << amount) | mask; + } + case ARM64_SFT_INVALID: + return value; + default: + assert(false && "Unknown shift type"); + return 0; } -}; +} + } // namespace aarch64 } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/bitmanip.hh b/src/include/simeng/arch/aarch64/helpers/bitmanip.hh index 1316e5ab1f..07adc9c73c 100644 --- a/src/include/simeng/arch/aarch64/helpers/bitmanip.hh +++ b/src/include/simeng/arch/aarch64/helpers/bitmanip.hh @@ -5,79 +5,76 @@ namespace simeng { namespace arch { namespace aarch64 { -class bitmanipHelp { - public: - /** Helper function for instructions with the format `bfm rd, rn, #immr, - * #imms`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type T. */ - template - static T bfm_2imms(std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - bool signExtend, bool zeroDestReg) { - uint8_t r = metadata.operands[2].imm; - uint8_t s = metadata.operands[3].imm; - T dest, source; - if (!zeroDestReg) { - dest = operands[0].get(); - source = operands[1].get(); - } else { - dest = 0; - source = operands[0].get(); - } - return AuxFunc::bitfieldManipulate(source, dest, r, s, signExtend); - } - /** Helper function for instructions with the format `extr rd, rn, rm, #lsb`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type T. */ - template - static T extrLSB_registers( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - T n = operands[0].get(); - T m = operands[1].get(); - int64_t lsb = metadata.operands[3].imm; - if (lsb == 0) return m; - return (m >> lsb) | (n << ((sizeof(T) * 8) - lsb)); +/** Helper function for instructions with the format `bfm rd, rn, #immr, + * #imms`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type T. */ +template +T bfm_2imms(std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + bool signExtend, bool zeroDestReg) { + uint8_t r = metadata.operands[2].imm; + uint8_t s = metadata.operands[3].imm; + T dest, source; + if (!zeroDestReg) { + dest = operands[0].get(); + source = operands[1].get(); + } else { + dest = 0; + source = operands[0].get(); } + return bitfieldManipulate(source, dest, r, s, signExtend); +} - /** Helper function for instructions with the format `rbit rd, rn`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type uint64_t. */ - template - static uint64_t rbit( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - int width = sizeof(T) * 8; +/** Helper function for instructions with the format `extr rd, rn, rm, #lsb`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type T. */ +template +T extrLSB_registers( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + T n = operands[0].get(); + T m = operands[1].get(); + int64_t lsb = metadata.operands[3].imm; + if (lsb == 0) return m; + return (m >> lsb) | (n << ((sizeof(T) * 8) - lsb)); +} - static uint8_t reversedNibble[16] = { - 0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010, 0b0110, 0b1110, - 0b0001, 0b1001, 0b0101, 0b1101, 0b0011, 0b1011, 0b0111, 0b1111}; +/** Helper function for instructions with the format `rbit rd, rn`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type uint64_t. */ +template +uint64_t rbit(std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + int width = sizeof(T) * 8; - uint64_t n = operands[0].get(); - uint64_t result = 0; - for (int i = 0; i < width; i += 4) { - result <<= 4; - result |= reversedNibble[n & 0b1111]; - n >>= 4; - } - return result; - } + uint8_t reversedNibble[16] = {0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010, + 0b0110, 0b1110, 0b0001, 0b1001, 0b0101, 0b1101, + 0b0011, 0b1011, 0b0111, 0b1111}; - /** Helper function for instructions with the format `rev rd, rn`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns array of uint8_t with number of elements = bytes in T. */ - template - static std::array rev( - std::vector& operands) { - auto bytes = operands[0].getAsVector(); - std::array reversed; - // Copy `bytes` backwards onto `reversed` - std::copy(bytes, bytes + sizeof(T), std::rbegin(reversed)); - return reversed; + uint64_t n = operands[0].get(); + uint64_t result = 0; + for (int i = 0; i < width; i += 4) { + result <<= 4; + result |= reversedNibble[n & 0b1111]; + n >>= 4; } -}; + return result; +} + +/** Helper function for instructions with the format `rev rd, rn`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns array of uint8_t with number of elements = bytes in T. */ +template +std::array rev(std::vector& operands) { + auto bytes = operands[0].getAsVector(); + std::array reversed; + // Copy `bytes` backwards onto `reversed` + std::copy(bytes, bytes + sizeof(T), std::rbegin(reversed)); + return reversed; +} + } // namespace aarch64 } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/comparison.hh b/src/include/simeng/arch/aarch64/helpers/comparison.hh index 04ea68ee1a..d2965ab027 100644 --- a/src/include/simeng/arch/aarch64/helpers/comparison.hh +++ b/src/include/simeng/arch/aarch64/helpers/comparison.hh @@ -5,23 +5,20 @@ namespace simeng { namespace arch { namespace aarch64 { -class comparisonHelp { - public: - /** Helper function for instructions with the format `orr rd, rn, rm {shift - * #amount}`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type T. */ - template - static T orrShift_3ops( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T n = operands[0].get(); - const T m = - shiftValue(operands[1].get(), metadata.operands[2].shift.type, - metadata.operands[2].shift.value); - return (n | m); - } -}; + +/** Helper function for instructions with the format `orr rd, rn, rm {shift + * #amount}`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type T. */ +template +T orrShift_3ops(std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + const T n = operands[0].get(); + const T m = shiftValue(operands[1].get(), metadata.operands[2].shift.type, + metadata.operands[2].shift.value); + return (n | m); +} + } // namespace aarch64 } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/conditional.hh b/src/include/simeng/arch/aarch64/helpers/conditional.hh index 4cdfce4061..e45a12f025 100644 --- a/src/include/simeng/arch/aarch64/helpers/conditional.hh +++ b/src/include/simeng/arch/aarch64/helpers/conditional.hh @@ -5,114 +5,109 @@ namespace simeng { namespace arch { namespace aarch64 { -class conditionalHelp { - public: - /** Helper function for instructions with the format `ccmn rn, #imm #nzcv, - * cc`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type uint8_t. */ - template - static uint8_t ccmn_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - if (AuxFunc::conditionHolds(metadata.cc, operands[0].get())) { - uint8_t nzcv; - std::tie(std::ignore, nzcv) = AuxFunc::addWithCarry( - operands[1].get(), static_cast(metadata.operands[1].imm), 0); - return nzcv; - } - return static_cast(metadata.operands[2].imm); + +/** Helper function for instructions with the format `ccmn rn, #imm #nzcv, + * cc`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type uint8_t. */ +template +uint8_t ccmn_imm(std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + if (conditionHolds(metadata.cc, operands[0].get())) { + uint8_t nzcv; + std::tie(std::ignore, nzcv) = addWithCarry( + operands[1].get(), static_cast(metadata.operands[1].imm), 0); + return nzcv; } + return static_cast(metadata.operands[2].imm); +} - /** Helper function for instructions with the format `ccmp rn, #imm #nzcv, - * cc`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type uint8_t. */ - template - static uint8_t ccmp_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - if (AuxFunc::conditionHolds(metadata.cc, operands[0].get())) { - uint8_t nzcv; - std::tie(std::ignore, nzcv) = AuxFunc::addWithCarry( - operands[1].get(), ~static_cast(metadata.operands[1].imm), 1); - return nzcv; - } - return static_cast(metadata.operands[2].imm); +/** Helper function for instructions with the format `ccmp rn, #imm #nzcv, + * cc`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type uint8_t. */ +template +uint8_t ccmp_imm(std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + if (conditionHolds(metadata.cc, operands[0].get())) { + uint8_t nzcv; + std::tie(std::ignore, nzcv) = addWithCarry( + operands[1].get(), ~static_cast(metadata.operands[1].imm), 1); + return nzcv; } + return static_cast(metadata.operands[2].imm); +} - /** Helper function for instructions with the format `ccmp rn, rm, #nzcv, - * cc`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type uint8_t. */ - template - static uint8_t ccmp_reg( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - if (AuxFunc::conditionHolds(metadata.cc, operands[0].get())) { - uint8_t nzcv; - std::tie(std::ignore, nzcv) = - AuxFunc::addWithCarry(operands[1].get(), ~operands[2].get(), 1); - return nzcv; - } - return static_cast(metadata.operands[2].imm); +/** Helper function for instructions with the format `ccmp rn, rm, #nzcv, + * cc`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type uint8_t. */ +template +uint8_t ccmp_reg(std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + if (conditionHolds(metadata.cc, operands[0].get())) { + uint8_t nzcv; + std::tie(std::ignore, nzcv) = + addWithCarry(operands[1].get(), ~operands[2].get(), 1); + return nzcv; } + return static_cast(metadata.operands[2].imm); +} - /** Helper function for instructions with the format `cb rn, #imm`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch taken, uint64_t address]. */ - template - static std::tuple condBranch_cmpToZero( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - uint64_t instructionAddress, std::function func) { - bool branchTaken; - uint64_t branchAddress; - if (func(operands[0].get())) { - branchTaken = true; - branchAddress = instructionAddress + metadata.operands[1].imm; - } else { - branchTaken = false; - branchAddress = instructionAddress + 4; - } - return {branchTaken, branchAddress}; +/** Helper function for instructions with the format `cb rn, #imm`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of type [bool branch taken, uint64_t address]. */ +template +std::tuple condBranch_cmpToZero( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + uint64_t instructionAddress, std::function func) { + bool branchTaken; + uint64_t branchAddress; + if (func(operands[0].get())) { + branchTaken = true; + branchAddress = instructionAddress + metadata.operands[1].imm; + } else { + branchTaken = false; + branchAddress = instructionAddress + 4; } + return {branchTaken, branchAddress}; +} - /** Helper function for instructions with the format `cs - * rd, rn, rm, cc`. - * T represents the type of operands (e.g. for xd, T = uint64_t). - * Returns single value of type T. */ - template - static T cs_4ops(std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - std::function func) { - if (AuxFunc::conditionHolds(metadata.cc, operands[0].get())) { - return operands[1].get(); - } - return func(operands[2].get()); +/** Helper function for instructions with the format `cs + * rd, rn, rm, cc`. + * T represents the type of operands (e.g. for xd, T = uint64_t). + * Returns single value of type T. */ +template +T cs_4ops(std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + std::function func) { + if (conditionHolds(metadata.cc, operands[0].get())) { + return operands[1].get(); } + return func(operands[2].get()); +} - /** Helper function for instructions with the format `tb rn, #imm, - * label`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch taken, uint64_t address]. */ - template - static std::tuple tbnz_tbz( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - uint64_t instructionAddress, bool isNZ) { - bool branchTaken; - uint64_t branchAddress = instructionAddress; - if (operands[0].get() & - (static_cast(1) << metadata.operands[1].imm)) { - branchTaken = isNZ; - } else { - branchTaken = !isNZ; - } - branchAddress += branchTaken ? metadata.operands[2].imm : 4; - return {branchTaken, branchAddress}; +/** Helper function for instructions with the format `tb rn, #imm, + * label`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of type [bool branch taken, uint64_t address]. */ +template +std::tuple tbnz_tbz( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + uint64_t instructionAddress, bool isNZ) { + bool branchTaken; + uint64_t branchAddress = instructionAddress; + if (operands[0].get() & (static_cast(1) << metadata.operands[1].imm)) { + branchTaken = isNZ; + } else { + branchTaken = !isNZ; } -}; + branchAddress += branchTaken ? metadata.operands[2].imm : 4; + return {branchTaken, branchAddress}; +} + } // namespace aarch64 } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/divide.hh b/src/include/simeng/arch/aarch64/helpers/divide.hh index f4e226a0b6..b67d08de6f 100644 --- a/src/include/simeng/arch/aarch64/helpers/divide.hh +++ b/src/include/simeng/arch/aarch64/helpers/divide.hh @@ -5,19 +5,18 @@ namespace simeng { namespace arch { namespace aarch64 { -class divideHelp { - public: - /** Helper function for instructions with the format `div rd, rn, rm`. - * T represents the type of operands (e.g. for xd, T = uint64_t). - * Returns single value of type T. */ - template - static T div_3ops(std::vector& operands) { - const T n = operands[0].get(); - const T m = operands[1].get(); - if (m == 0) return 0; - return (n / m); - } -}; + +/** Helper function for instructions with the format `div rd, rn, rm`. + * T represents the type of operands (e.g. for xd, T = uint64_t). + * Returns single value of type T. */ +template +T div_3ops(std::vector& operands) { + const T n = operands[0].get(); + const T m = operands[1].get(); + if (m == 0) return 0; + return (n / m); +} + } // namespace aarch64 } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/float.hh b/src/include/simeng/arch/aarch64/helpers/float.hh index 169f4d56a7..59c0912c6a 100644 --- a/src/include/simeng/arch/aarch64/helpers/float.hh +++ b/src/include/simeng/arch/aarch64/helpers/float.hh @@ -1,156 +1,182 @@ #pragma once +#include + #include "auxiliaryFunctions.hh" namespace simeng { namespace arch { namespace aarch64 { -class floatHelp { - public: - /** Helper function for instructions with the format `fabd rd, rn, rm`. - * T represents the type of operands (e.g. for sd T = float). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue fabd_3ops(std::vector& operands) { - const T n = operands[0].get(); - const T m = operands[1].get(); - return {std::fabs(n - m), 256}; - } - /** Helper function for instructions with the format `fabs rd, rn`. - * T represents the type of operands (e.g. for sd T = float). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue fabs_2ops(std::vector& operands) { - const T n = operands[0].get(); - return {std::fabs(n), 256}; - } +/** Helper function for instructions with the format `fabd rd, rn, rm`. + * T represents the type of operands (e.g. for sd T = float). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue fabd_3ops(std::vector& operands) { + const T n = operands[0].get(); + const T m = operands[1].get(); + return {std::fabs(n - m), 256}; +} - /** Helper function for instructions with the format `fccmp rn, rm, #nzcv, - * cc`. - * T represents the type of operands (e.g. for sn T = float). - * Returns single value of type uint8_t. */ - template - static uint8_t fccmp( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - if (AuxFunc::conditionHolds(metadata.cc, operands[0].get())) { - T a = operands[1].get(); - T b = operands[2].get(); - if (std::isnan(a) || std::isnan(b)) { - // TODO: Raise exception if NaNs are signalling or fcmpe - return AuxFunc::nzcv(false, false, true, true); - } else if (a == b) { - return AuxFunc::nzcv(false, true, true, false); - } else if (a < b) { - return AuxFunc::nzcv(true, false, false, false); - } else { - return AuxFunc::nzcv(false, false, true, false); - } - } - return static_cast(metadata.operands[2].imm); - } +/** Helper function for instructions with the format `fabs rd, rn`. + * T represents the type of operands (e.g. for sd T = float). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue fabs_2ops(std::vector& operands) { + const T n = operands[0].get(); + return {std::fabs(n), 256}; +} - /** Helper function for instructions with the format `fcmp rn, `. - * T represents the type of operands (e.g. for sn T = float). - * Returns single value of type uint8_t. */ - template - static uint8_t fcmp(std::vector& operands, bool useImm) { - T a = operands[0].get(); - // Dont need to fetch imm as will always be 0.0 - T b = useImm ? 0 : operands[1].get(); +/** Helper function for instructions with the format `fccmp rn, rm, #nzcv, + * cc`. + * T represents the type of operands (e.g. for sn T = float). + * Returns single value of type uint8_t. */ +template +uint8_t fccmp(std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + if (conditionHolds(metadata.cc, operands[0].get())) { + T a = operands[1].get(); + T b = operands[2].get(); if (std::isnan(a) || std::isnan(b)) { // TODO: Raise exception if NaNs are signalling or fcmpe - return AuxFunc::nzcv(false, false, true, true); + return nzcv(false, false, true, true); } else if (a == b) { - return AuxFunc::nzcv(false, true, true, false); + return nzcv(false, true, true, false); } else if (a < b) { - return AuxFunc::nzcv(true, false, false, false); + return nzcv(true, false, false, false); + } else { + return nzcv(false, false, true, false); } - return AuxFunc::nzcv(false, false, true, false); } + return static_cast(metadata.operands[2].imm); +} - /** Helper function for instructions with the format `fmaxnm rd, rn, rm`. - * T represents the type of operands (e.g. for sd T = float). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue fmaxnm_3ops(std::vector& operands) { - const T n = operands[0].get(); - const T m = operands[1].get(); - return {std::fmax(n, m), 256}; +/** Helper function for instructions with the format `fcmp rn, `. + * T represents the type of operands (e.g. for sn T = float). + * Returns single value of type uint8_t. */ +template +uint8_t fcmp(std::vector& operands, bool useImm) { + T a = operands[0].get(); + // Dont need to fetch imm as will always be 0.0 + T b = useImm ? 0 : operands[1].get(); + if (std::isnan(a) || std::isnan(b)) { + // TODO: Raise exception if NaNs are signalling or fcmpe + return nzcv(false, false, true, true); + } else if (a == b) { + return nzcv(false, true, true, false); + } else if (a < b) { + return nzcv(true, false, false, false); } + return nzcv(false, false, true, false); +} - /** Helper function for instructions with the format `fmaxnm rd, rn, rm`. - * T represents the type of operands (e.g. for sd T = float). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue fminnm_3ops(std::vector& operands) { - const T n = operands[0].get(); - const T m = operands[1].get(); - return {std::fmin(n, m), 256}; - } +/** Helper function for instructions with the format `fmaxnm rd, rn, rm`. + * T represents the type of operands (e.g. for sd T = float). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue fmaxnm_3ops(std::vector& operands) { + const T n = operands[0].get(); + const T m = operands[1].get(); + return {std::fmax(n, m), 256}; +} - /** Helper function for NEON instructions with the format `fnmsub rd, rn, rm, - * ra`. - * T represents the type of operands (e.g. for sd T = float). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue fnmsub_4ops(std::vector& operands) { - T n = operands[0].get(); - T m = operands[1].get(); - T a = operands[2].get(); - return {std::fma(n, m, -a), 256}; - } +/** Helper function for instructions with the format `fmaxnm rd, rn, rm`. + * T represents the type of operands (e.g. for sd T = float). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue fminnm_3ops(std::vector& operands) { + const T n = operands[0].get(); + const T m = operands[1].get(); + return {std::fmin(n, m), 256}; +} - /** Helper function for NEON instructions with the format `fnmadd rd, rn, rm, - * ra`. - * T represents the type of operands (e.g. for sd T = float). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue fnmadd_4ops(std::vector& operands) { - T n = operands[0].get(); - T m = operands[1].get(); - T a = operands[2].get(); - return {std::fma(-n, m, -a), 256}; - } +/** Helper function for NEON instructions with the format `fnmsub rd, rn, rm, + * ra`. + * T represents the type of operands (e.g. for sd T = float). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue fnmsub_4ops(std::vector& operands) { + T n = operands[0].get(); + T m = operands[1].get(); + T a = operands[2].get(); + return {std::fma(n, m, -a), 256}; +} - /** Helper function for NEON instructions with the format `frintp rd, rn`. - * T represents the type of operands (e.g. for dd T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue frintpScalar_2ops(std::vector& operands) { - T n = operands[0].get(); - - // Merge always = false due to assumption that FPCR.nep bit = 0 - // (In SimEng the value of this register is not manually set) - T out = 0; - // Input of Infinity or 0 gives output of the same sign - if (n == 0.0 || n == -0.0 || n == INFINITY || n == -INFINITY) - out = n; - else - out = std::ceil(n); - - return {out, 256}; - } +/** Helper function for NEON instructions with the format `fnmadd rd, rn, rm, + * ra`. + * T represents the type of operands (e.g. for sd T = float). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue fnmadd_4ops(std::vector& operands) { + T n = operands[0].get(); + T m = operands[1].get(); + T a = operands[2].get(); + return {std::fma(-n, m, -a), 256}; +} + +/** Helper function for NEON instructions with the format `frintp rd, rn`. + * T represents the type of operands (e.g. for dd T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue frintpScalar_2ops(std::vector& operands) { + T n = operands[0].get(); - /** Helper function for NEON instructions with the format `scvtf rd, - * n`, #fbits. - * D represents the destination vector register type (e.g. for dd, D = - * double). - * N represents the source vector register type (e.g. for wn, N = int32_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue scvtf_FixedPoint( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - N n = operands[0].get(); - const uint8_t fbits = metadata.operands[2].imm; - - D out = static_cast(n) / std::pow(2, fbits); - - return {out, 256}; + // Merge always = false due to assumption that FPCR.nep bit = 0 + // (In SimEng the value of this register is not manually set) + T out = 0; + // Input of Infinity or 0 gives output of the same sign + if (n == 0.0 || n == -0.0 || n == INFINITY || n == -INFINITY) + out = n; + else + out = std::ceil(n); + + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `scvtf rd, + * n`, #fbits. + * D represents the destination vector register type (e.g. for dd, D = + * double). + * N represents the source vector register type (e.g. for wn, N = int32_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue scvtf_FixedPoint( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + N n = operands[0].get(); + const uint8_t fbits = metadata.operands[2].imm; + + D out = static_cast(n) / std::pow(2, fbits); + + return {out, 256}; +} + +/** Helper function for NEON instructions with the format fcvtzu rd, rn. + * D represents the destination register type (e.g. for Xd, D = uint64_t). + * N represents the source register type (e.g. for Sd, N = float). + * Returns single value of type D. */ +template +D fcvtzu_integer(std::vector& operands) { + N input = operands[0].get(); + D result = static_cast(0); + + // Check for nan and less than 0 + if (!std::isnan(input) && (input > static_cast(0))) { + if (std::isinf(input)) { + // Account for Infinity + result = std::numeric_limits::max(); + } else if (input > std::numeric_limits::max()) { + // Account for the source value being larger than the + // destination register can support + result = std::numeric_limits::max(); + } else { + result = static_cast(std::trunc(input)); + } } -}; + + return result; +} + } // namespace aarch64 } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/load.hh b/src/include/simeng/arch/aarch64/helpers/load.hh deleted file mode 100644 index 08f68d726a..0000000000 --- a/src/include/simeng/arch/aarch64/helpers/load.hh +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include "auxiliaryFunctions.hh" - -namespace simeng { -namespace arch { -namespace aarch64 { -class loadHelp { - public: - static void tempFunc() { return; } -}; -} // namespace aarch64 -} // namespace arch -} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/logical.hh b/src/include/simeng/arch/aarch64/helpers/logical.hh index 154bf2e59a..d81c449522 100644 --- a/src/include/simeng/arch/aarch64/helpers/logical.hh +++ b/src/include/simeng/arch/aarch64/helpers/logical.hh @@ -5,105 +5,100 @@ namespace simeng { namespace arch { namespace aarch64 { -class logicalHelp { - public: - /** Helper function for instructions with the format `asrv rd, rn, rm`. - * T represents the type of operands (e.g. for xn, T = int64_t). - * Returns single value of type T. */ - template - static T asrv_3gpr(std::vector& operands) { - const T n = operands[0].get(); - const T m = operands[1].get::type>(); - return n >> (m % (sizeof(T) * 8)); - } - /** Helper function for instructions with the format `bic rd, rn, rm{, shift - * #amount}`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of [resulting value, nzcv]. */ - template - static std::tuple bicShift_3ops( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - bool calcNZCV) { - const T x = operands[0].get(); - const T y = - ~shiftValue(operands[1].get(), metadata.operands[2].shift.type, - metadata.operands[2].shift.value); - T result = x & y; - bool n = sizeof(T) == 8 ? (static_cast(result) < 0) - : (static_cast(result) < 0); - bool z = (result == 0); - uint8_t nzcv = calcNZCV ? AuxFunc::nzcv(n, z, false, false) : 0; - return {result, nzcv}; - } +/** Helper function for instructions with the format `asrv rd, rn, rm`. + * T represents the type of operands (e.g. for xn, T = int64_t). + * Returns single value of type T. */ +template +T asrv_3gpr(std::vector& operands) { + const T n = operands[0].get(); + const T m = operands[1].get::type>(); + return n >> (m % (sizeof(T) * 8)); +} - /** Helper function for instructions with the format ` rd, rn, - * #imm`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of [resulting value, nzcv]. */ - template - static std::tuple logicOp_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV, - std::function func) { - const T n = operands[0].get(); - const T m = static_cast(metadata.operands[2].imm); - T result = func(n, m); - uint8_t nzcv = calcNZCV ? AuxFunc::nzcv(result >> ((sizeof(T) * 8) - 1), - result == 0, false, false) - : 0; - return {result, nzcv}; - } +/** Helper function for instructions with the format `bic rd, rn, rm{, shift + * #amount}`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of [resulting value, nzcv]. */ +template +std::tuple bicShift_3ops( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) { + const T x = operands[0].get(); + const T y = ~shiftValue(operands[1].get(), metadata.operands[2].shift.type, + metadata.operands[2].shift.value); + T result = x & y; + bool n = sizeof(T) == 8 ? (static_cast(result) < 0) + : (static_cast(result) < 0); + bool z = (result == 0); + uint8_t nzcv_ = calcNZCV ? nzcv(n, z, false, false) : 0; + return {result, nzcv_}; +} - /** Helper function for instructions with the format ` rd, rn, - * rm{, shift #amount}`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns tuple of [resulting value, nzcv]. */ - template - static std::tuple logicOpShift_3ops( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV, - std::function func) { - const T n = operands[0].get(); - const T m = - shiftValue(operands[1].get(), metadata.operands[2].shift.type, - metadata.operands[2].shift.value); - T result = func(n, m); - uint8_t nzcv = calcNZCV ? AuxFunc::nzcv(result >> ((sizeof(T) * 8) - 1), - result == 0, false, false) - : 0; - return {result, nzcv}; - } +/** Helper function for instructions with the format ` rd, rn, + * #imm`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of [resulting value, nzcv]. */ +template +std::tuple logicOp_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV, + std::function func) { + const T n = operands[0].get(); + const T m = static_cast(metadata.operands[2].imm); + T result = func(n, m); + uint8_t nzcv_ = calcNZCV ? nzcv(result >> ((sizeof(T) * 8) - 1), result == 0, + false, false) + : 0; + return {result, nzcv_}; +} - /** Helper function for instructions with the format `lsv rd, rn, rm`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type uint64_t. */ - template - static uint64_t logicalShiftLR_3ops(std::vector& operands, - bool isLSL) { - const T n = operands[0].get(); - const T m = operands[1].get() & ((sizeof(T) * 8) - 1); - uint64_t result = static_cast(isLSL ? n << m : n >> m); - return result; - } +/** Helper function for instructions with the format ` rd, rn, + * rm{, shift #amount}`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns tuple of [resulting value, nzcv]. */ +template +std::tuple logicOpShift_3ops( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV, + std::function func) { + const T n = operands[0].get(); + const T m = shiftValue(operands[1].get(), metadata.operands[2].shift.type, + metadata.operands[2].shift.value); + T result = func(n, m); + uint8_t nzcv_ = calcNZCV ? nzcv(result >> ((sizeof(T) * 8) - 1), result == 0, + false, false) + : 0; + return {result, nzcv_}; +} - /** Helper function for instructions with the format `rorv rd, rn, rm`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type T. */ - template - static T rorv_3ops(std::vector& operands) { - const T n = operands[0].get(); - const T m = operands[1].get(); +/** Helper function for instructions with the format `lsv rd, rn, rm`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type uint64_t. */ +template +uint64_t logicalShiftLR_3ops(std::vector& operands, bool isLSL) { + const T n = operands[0].get(); + const T m = operands[1].get() & ((sizeof(T) * 8) - 1); + uint64_t result = static_cast(isLSL ? n << m : n >> m); + return result; +} - const uint16_t data_size = sizeof(T) * 8; - T remainder = m % data_size; +/** Helper function for instructions with the format `rorv rd, rn, rm`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type T. */ +template +T rorv_3ops(std::vector& operands) { + const T n = operands[0].get(); + const T m = operands[1].get(); + + const uint16_t data_size = sizeof(T) * 8; + T remainder = m % data_size; + + // Check if any rotation done at all + if (remainder == 0) return n; + return (n >> remainder) + (n << (data_size - remainder)); +} - // Check if any rotation done at all - if (remainder == 0) return n; - return (n >> remainder) + (n << (data_size - remainder)); - } -}; } // namespace aarch64 } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/multiply.hh b/src/include/simeng/arch/aarch64/helpers/multiply.hh index d5466bac04..72193ed966 100644 --- a/src/include/simeng/arch/aarch64/helpers/multiply.hh +++ b/src/include/simeng/arch/aarch64/helpers/multiply.hh @@ -5,54 +5,53 @@ namespace simeng { namespace arch { namespace aarch64 { -class multiplyHelp { - public: - /** Helper function for instructions with the format `madd rd, rn, rm, ra`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type T. */ - template - static T madd_4ops(std::vector& operands) { - const T n = operands[0].get(); - const T m = operands[1].get(); - const T a = operands[2].get(); - return (a + (n * m)); - } - /** Helper function for instructions with the format `maddl xd, wn, wm, xa`. - * D represents the type of the destination register (either int64_t or - * uint64_t). - * N represents the type of the first source register (either - * int32_t or uint32_t). - * Returns single value of type D. */ - template - static D maddl_4ops(std::vector& operands) { - const D n = static_cast(operands[0].get()); - const D m = static_cast(operands[1].get()); - const D a = operands[2].get(); - return (a + (n * m)); - } +/** Helper function for instructions with the format `madd rd, rn, rm, ra`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type T. */ +template +T madd_4ops(std::vector& operands) { + const T n = operands[0].get(); + const T m = operands[1].get(); + const T a = operands[2].get(); + return (a + (n * m)); +} - /** Helper function for instructions with the format `mul rd, rn, rm`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type T. */ - template - static T mul_3ops(std::vector& operands) { - const T n = operands[0].get(); - const T m = operands[1].get(); - return (n * m); - } +/** Helper function for instructions with the format `maddl xd, wn, wm, xa`. + * D represents the type of the destination register (either int64_t or + * uint64_t). + * N represents the type of the first source register (either + * int32_t or uint32_t). + * Returns single value of type D. */ +template +D maddl_4ops(std::vector& operands) { + const D n = static_cast(operands[0].get()); + const D m = static_cast(operands[1].get()); + const D a = operands[2].get(); + return (a + (n * m)); +} + +/** Helper function for instructions with the format `mul rd, rn, rm`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type T. */ +template +T mul_3ops(std::vector& operands) { + const T n = operands[0].get(); + const T m = operands[1].get(); + return (n * m); +} + +/** Helper function for instructions with the format `msub rd, rn, rm, ra`. + * T represents the type of operands (e.g. for xn, T = uint64_t). + * Returns single value of type T. */ +template +T msub_4ops(std::vector& operands) { + const T n = operands[0].get(); + const T m = operands[1].get(); + const T a = operands[2].get(); + return (a - (n * m)); +} - /** Helper function for instructions with the format `msub rd, rn, rm, ra`. - * T represents the type of operands (e.g. for xn, T = uint64_t). - * Returns single value of type T. */ - template - static T msub_4ops(std::vector& operands) { - const T n = operands[0].get(); - const T m = operands[1].get(); - const T a = operands[2].get(); - return (a - (n * m)); - } -}; } // namespace aarch64 } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh index c9382ff954..c4978fbc09 100644 --- a/src/include/simeng/arch/aarch64/helpers/neon.hh +++ b/src/include/simeng/arch/aarch64/helpers/neon.hh @@ -5,956 +5,945 @@ namespace simeng { namespace arch { namespace aarch64 { -class neonHelp { - public: - /** Helper function for NEON instructions with the format `add vd, vn, vm`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted Register Value. */ - template - static RegisterValue vecAdd_3ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = static_cast(n[i] + m[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `addp vd, vn, vm`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted Register Value. */ - template - static RegisterValue vecAddp_3ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - T out[16 / sizeof(T)] = {0}; - uint8_t offset = I / 2; - for (int i = 0; i < I; i++) { - if (i < offset) { - out[i] = static_cast(n[i * 2] + n[(i * 2) + 1]); - } else { - out[i] = - static_cast(m[(i - offset) * 2] + m[((i - offset) * 2) + 1]); - } - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `bic vd, vn, vm`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted Register Value. */ - template - static RegisterValue vecBic_3ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = n[i] & ~m[i]; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `bic vd, #imm{, lsl - * #shift}`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted Register Value. */ - template - static RegisterValue vecBicShift_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* d = operands[0].getAsVector(); - T imm = ~shiftValue(static_cast(metadata.operands[1].imm), - metadata.operands[1].shift.type, - metadata.operands[1].shift.value); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = d[i] & imm; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `bi vd, vn, - * vm`. - * I represents the number of elements in the output array to be updated - * (e.g. for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecBitwiseInsert(std::vector& operands, - bool isBif) { - const uint64_t* d = operands[0].getAsVector(); - const uint64_t* n = operands[1].getAsVector(); - const uint64_t* m = operands[2].getAsVector(); - uint64_t out[2] = {0}; - for (int i = 0; i < (I / 8); i++) { - out[i] = isBif ? (d[i] & m[i]) | (n[i] & ~m[i]) - : (d[i] & ~m[i]) | (n[i] & m[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `bsl vd, vn, - * vm`. - * I represents the number of elements in the output array to be updated - * (e.g. for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecBsl(std::vector& operands) { - const uint64_t* d = operands[0].getAsVector(); - const uint64_t* n = operands[1].getAsVector(); - const uint64_t* m = operands[2].getAsVector(); - uint64_t out[2] = {0}; - for (int i = 0; i < (I / 8); i++) { - out[i] = (d[i] & n[i]) | (~d[i] & m[i]); - } - return {out, 256}; - } - - /** Helper function for instructions with the format `cm vd, vn, `. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecCompare(std::vector& operands, - bool cmpToZero, - std::function func) { - const T* n = operands[0].getAsVector(); - const T* m; - if (!cmpToZero) m = operands[1].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = func(n[i], cmpToZero ? static_cast(0) : m[i]) - ? static_cast(-1) - : 0; - } - return {out, 256}; - } - - /** Helper function for instructions with the format `cnt vd, vn`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecCountPerByte(std::vector& operands) { - const uint8_t* n = operands[0].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - for (int j = 0; j < (sizeof(T) * 8); j++) { - // Move queried bit to LSB and extract via an AND operator - out[i] += ((n[i] >> j) & 1); - } - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `dup , - * `. - * T represents the type of operands (e.g. for vd.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecDup_gprOrIndex( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, bool useGpr) { - int index = useGpr ? 0 : metadata.operands[1].vector_index; - T element = - useGpr ? operands[0].get() : operands[0].getAsVector()[index]; - T out[16 / sizeof(T)] = {0}; - std::fill_n(std::begin(out), I, element); - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `ext vd, - * vn, vm, #index`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecExtVecs_index( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - const uint64_t index = static_cast(metadata.operands[3].imm); - T out[16 / sizeof(T)] = {0}; - - for (int i = index; i < I; i++) { - out[i - index] = n[i]; - } - for (int i = 0; i < index; i++) { - out[I - index + i] = m[i]; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fabd vd.T, vn.T, - * vm.T`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFabd(std::vector& operands) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = std::fabs(n[i] - m[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fabs vd, vn`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFabs_2ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = std::fabs(n[i]); - } - return {out, 256}; - } - - /** Helper function for instructions with the format `fcm vd, vn, `. - * T represents operand type (e.g. vd.2d is double). - * C represents comparison type (e.g. for T=float, comparison type is - * uint32_t). - * I represents the number of elements in the output array to be - * updated (e.g. for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFCompare(std::vector& operands, - bool cmpToZero, - std::function func) { - const T* n = operands[0].getAsVector(); - const T* m; - if (!cmpToZero) m = operands[1].getAsVector(); - C out[16 / sizeof(C)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = func(n[i], cmpToZero ? static_cast(0) : m[i]) - ? static_cast(-1) - : 0; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fcvtl{2} vd, vn`. - * D represents the dest. vector register type (e.g. vd.2d would be double). - * N represents the source vector register type (e.g. vd.4s would be float). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFcvtl(std::vector& operands, - bool isFcvtl2) { - const N* n = operands[0].getAsVector(); - D out[16 / sizeof(D)] = {0}; - for (int i = (isFcvtl2 ? I : 0); i < (isFcvtl2 ? (I * 2) : I); i++) { - out[isFcvtl2 ? (i - I) : i] = static_cast(n[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fcvtn{2} vd, vn`. - * D represents the dest. vector register type (e.g. vd.2s would be float). - * N represents the source vector register type (e.g. vd.2d would be double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFcvtn(std::vector& operands, - bool isFcvtn2) { - const N* n = operands[0].getAsVector(); - D out[16 / sizeof(D)] = {0}; - for (int i = (isFcvtn2 ? (I / 2) : 0); i < I; i++) { - out[i] = static_cast(n[isFcvtn2 ? (i - (I / 2)) : i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fcvtzs vd, vn`. - * D represents the dest. vector register type (e.g. vd.2s would be float). - * N represents the source vector register type (e.g. vd.2d would be double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFcvtzs(std::vector& operands) { - const N* n = operands[0].getAsVector(); - D out[16 / sizeof(D)] = {0}; - // TODO: Handle NaNs, denorms, and saturation - for (int i = 0; i < I; i++) { - out[i] = static_cast(std::trunc(n[i])); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fmla vd, - * vn, vm`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFmla_3vecs(std::vector& operands) { - const T* d = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - const T* m = operands[2].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = d[i] + n[i] * m[i]; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fmla vd, - * vn, vm[index]`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFmlaIndexed_3vecs( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* d = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - int index = metadata.operands[2].vector_index; - const T m = operands[2].getAsVector()[index]; - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = d[i] + n[i] * m; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fmls vd, - * vn, vm`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFmls_3vecs(std::vector& operands) { - const T* d = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - const T* m = operands[2].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = d[i] - (n[i] * m[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fmls vd, - * vn, vm[index]`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFmlsIndexed_3vecs( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* d = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - int index = metadata.operands[2].vector_index; - const T m = operands[2].getAsVector()[index]; - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = d[i] - n[i] * m; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fmul rd, - * rn, vm[index]`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFmulIndexed_vecs( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - int index = metadata.operands[2].vector_index; - const T* n = operands[0].getAsVector(); - const T m = operands[1].getAsVector()[index]; - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = n[i] * m; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fneg vd, vn`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFneg_2ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = -n[i]; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `fsqrt vd, vn`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFsqrt_2ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = ::sqrt(n[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `frsqrte vd, vn`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFrsqrte_2ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = 1.0f / sqrtf(n[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `frsqrts vd, vn, - * vm`. - * T represents the type of operands (e.g. for vn.2d, T = double). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecFrsqrts_3ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = (3.0f - n[i] * m[i]) / 2.0f; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `ins vd[index], - * vn[index]`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecIns_2Index( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* d = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = d[i]; - } - out[metadata.operands[0].vector_index] = - n[metadata.operands[1].vector_index]; - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `ins vd[index], - * rn`. - * T represents the vector register type (e.g. vd.16b would be uint8_t). - * R represents the type of the GPR (e.g. wn would be uint32_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecInsIndex_gpr( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* d = operands[0].getAsVector(); - const T n = operands[1].get(); - T out[16 / sizeof(T)] = {0}; - - for (int i = 0; i < I; i++) { + +/** Helper function for NEON instructions with the format `add vd, vn, vm`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted Register Value. */ +template +RegisterValue vecAdd_3ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = static_cast(n[i] + m[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `addp vd, vn, vm`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted Register Value. */ +template +RegisterValue vecAddp_3ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + T out[16 / sizeof(T)] = {0}; + uint8_t offset = I / 2; + for (int i = 0; i < I; i++) { + if (i < offset) { + out[i] = static_cast(n[i * 2] + n[(i * 2) + 1]); + } else { + out[i] = static_cast(m[(i - offset) * 2] + m[((i - offset) * 2) + 1]); + } + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `bic vd, vn, vm`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted Register Value. */ +template +RegisterValue vecBic_3ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = n[i] & ~m[i]; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `bic vd, #imm{, lsl + * #shift}`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted Register Value. */ +template +RegisterValue vecBicShift_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + const T* d = operands[0].getAsVector(); + T imm = ~shiftValue(static_cast(metadata.operands[1].imm), + metadata.operands[1].shift.type, + metadata.operands[1].shift.value); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = d[i] & imm; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `bi vd, vn, + * vm`. + * I represents the number of elements in the output array to be updated + * (e.g. for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecBitwiseInsert(std::vector& operands, + bool isBif) { + const uint64_t* d = operands[0].getAsVector(); + const uint64_t* n = operands[1].getAsVector(); + const uint64_t* m = operands[2].getAsVector(); + uint64_t out[2] = {0}; + for (int i = 0; i < (I / 8); i++) { + out[i] = + isBif ? (d[i] & m[i]) | (n[i] & ~m[i]) : (d[i] & ~m[i]) | (n[i] & m[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `bsl vd, vn, + * vm`. + * I represents the number of elements in the output array to be updated + * (e.g. for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecBsl(std::vector& operands) { + const uint64_t* d = operands[0].getAsVector(); + const uint64_t* n = operands[1].getAsVector(); + const uint64_t* m = operands[2].getAsVector(); + uint64_t out[2] = {0}; + for (int i = 0; i < (I / 8); i++) { + out[i] = (d[i] & n[i]) | (~d[i] & m[i]); + } + return {out, 256}; +} + +/** Helper function for instructions with the format `cm vd, vn, `. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecCompare(std::vector& operands, bool cmpToZero, + std::function func) { + const T* n = operands[0].getAsVector(); + const T* m; + if (!cmpToZero) m = operands[1].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = func(n[i], cmpToZero ? static_cast(0) : m[i]) + ? static_cast(-1) + : 0; + } + return {out, 256}; +} + +/** Helper function for instructions with the format `cnt vd, vn`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecCountPerByte(std::vector& operands) { + const uint8_t* n = operands[0].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + for (int j = 0; j < (sizeof(T) * 8); j++) { + // Move queried bit to LSB and extract via an AND operator + out[i] += ((n[i] >> j) & 1); + } + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `dup , + * `. + * T represents the type of operands (e.g. for vd.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecDup_gprOrIndex( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool useGpr) { + int index = useGpr ? 0 : metadata.operands[1].vector_index; + T element = + useGpr ? operands[0].get() : operands[0].getAsVector()[index]; + T out[16 / sizeof(T)] = {0}; + std::fill_n(std::begin(out), I, element); + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `ext vd, + * vn, vm, #index`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecExtVecs_index( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + const uint64_t index = static_cast(metadata.operands[3].imm); + T out[16 / sizeof(T)] = {0}; + + for (int i = index; i < I; i++) { + out[i - index] = n[i]; + } + for (int i = 0; i < index; i++) { + out[I - index + i] = m[i]; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fabd vd.T, vn.T, + * vm.T`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFabd(std::vector& operands) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = std::fabs(n[i] - m[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fabs vd, vn`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFabs_2ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = std::fabs(n[i]); + } + return {out, 256}; +} + +/** Helper function for instructions with the format `fcm vd, vn, `. + * T represents operand type (e.g. vd.2d is double). + * C represents comparison type (e.g. for T=float, comparison type is + * uint32_t). + * I represents the number of elements in the output array to be + * updated (e.g. for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFCompare(std::vector& operands, bool cmpToZero, + std::function func) { + const T* n = operands[0].getAsVector(); + const T* m; + if (!cmpToZero) m = operands[1].getAsVector(); + C out[16 / sizeof(C)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = func(n[i], cmpToZero ? static_cast(0) : m[i]) + ? static_cast(-1) + : 0; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fcvtl{2} vd, vn`. + * D represents the dest. vector register type (e.g. vd.2d would be double). + * N represents the source vector register type (e.g. vd.4s would be float). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFcvtl(std::vector& operands, bool isFcvtl2) { + const N* n = operands[0].getAsVector(); + D out[16 / sizeof(D)] = {0}; + for (int i = (isFcvtl2 ? I : 0); i < (isFcvtl2 ? (I * 2) : I); i++) { + out[isFcvtl2 ? (i - I) : i] = static_cast(n[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fcvtn{2} vd, vn`. + * D represents the dest. vector register type (e.g. vd.2s would be float). + * N represents the source vector register type (e.g. vd.2d would be double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFcvtn(std::vector& operands, bool isFcvtn2) { + const N* n = operands[0].getAsVector(); + D out[16 / sizeof(D)] = {0}; + for (int i = (isFcvtn2 ? (I / 2) : 0); i < I; i++) { + out[i] = static_cast(n[isFcvtn2 ? (i - (I / 2)) : i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fcvtzs vd, vn`. + * D represents the dest. vector register type (e.g. vd.2s would be float). + * N represents the source vector register type (e.g. vd.2d would be double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFcvtzs(std::vector& operands) { + const N* n = operands[0].getAsVector(); + D out[16 / sizeof(D)] = {0}; + // TODO: Handle NaNs, denorms, and saturation + for (int i = 0; i < I; i++) { + out[i] = static_cast(std::trunc(n[i])); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fmla vd, + * vn, vm`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFmla_3vecs(std::vector& operands) { + const T* d = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + const T* m = operands[2].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = d[i] + n[i] * m[i]; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fmla vd, + * vn, vm[index]`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFmlaIndexed_3vecs( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + const T* d = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + int index = metadata.operands[2].vector_index; + const T m = operands[2].getAsVector()[index]; + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = d[i] + n[i] * m; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fmls vd, + * vn, vm`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFmls_3vecs(std::vector& operands) { + const T* d = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + const T* m = operands[2].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = d[i] - (n[i] * m[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fmls vd, + * vn, vm[index]`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFmlsIndexed_3vecs( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + const T* d = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + int index = metadata.operands[2].vector_index; + const T m = operands[2].getAsVector()[index]; + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = d[i] - n[i] * m; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fmul rd, + * rn, vm[index]`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFmulIndexed_vecs( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + int index = metadata.operands[2].vector_index; + const T* n = operands[0].getAsVector(); + const T m = operands[1].getAsVector()[index]; + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = n[i] * m; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fneg vd, vn`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFneg_2ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = -n[i]; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `fsqrt vd, vn`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFsqrt_2ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = ::sqrt(n[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `frsqrte vd, vn`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFrsqrte_2ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = 1.0f / sqrtf(n[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `frsqrts vd, vn, + * vm`. + * T represents the type of operands (e.g. for vn.2d, T = double). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecFrsqrts_3ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = (3.0f - n[i] * m[i]) / 2.0f; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `ins vd[index], + * vn[index]`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecIns_2Index( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + const T* d = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = d[i]; + } + out[metadata.operands[0].vector_index] = n[metadata.operands[1].vector_index]; + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `ins vd[index], + * rn`. + * T represents the vector register type (e.g. vd.16b would be uint8_t). + * R represents the type of the GPR (e.g. wn would be uint32_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecInsIndex_gpr( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + const T* d = operands[0].getAsVector(); + const T n = operands[1].get(); + T out[16 / sizeof(T)] = {0}; + + for (int i = 0; i < I; i++) { + out[i] = d[i]; + } + out[metadata.operands[0].vector_index] = n; + return {out, 256}; +} + +/** Helper function for NEON instructions with the format ` vd, + * vn`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecLogicOp_2vecs(std::vector& operands, + std::function func) { + const T* n = operands[0].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = func(n[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format ` vd, + * vn, vm`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecLogicOp_3vecs(std::vector& operands, + std::function func) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = func(n[i], m[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `umaxp vd, vn, vm`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecUMaxP(std::vector& operands) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + T out[I]; + for (int i = 0; i < I; i++) { + out[i] = std::max(n[i], m[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `uminp vd, vn, vm`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecUMinP(std::vector& operands) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + T out[I]; + for (int i = 0; i < I; i++) { + out[i] = std::min(n[i], m[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `maxnmp rd, vn`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecMaxnmp_2ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + bool isFP = std::is_floating_point::value; + + T out = n[0]; + for (int i = 1; i < I; i++) { + out = isFP ? std::fmax(n[i], out) : std::max(n[i], out); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `sminv sd, vn`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecMinv_2ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + bool isFP = std::is_floating_point::value; + + T out = n[0]; + for (int i = 1; i < I; i++) { + out = isFP ? std::fmin(n[i], out) : std::min(n[i], out); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `movi vd, #imm`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecMovi_imm( + const simeng::arch::aarch64::InstructionMetadata& metadata) { + bool isFP = std::is_floating_point::value; + const T imm = + isFP ? metadata.operands[1].fp : static_cast(metadata.operands[1].imm); + T out[16 / sizeof(T)] = {0}; + std::fill_n(std::begin(out), I, imm); + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `movi vd, #imm{, lsl + * #shift}`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecMoviShift_imm( + const simeng::arch::aarch64::InstructionMetadata& metadata, bool negate) { + const T bits = shiftValue(static_cast(metadata.operands[1].imm), + metadata.operands[1].shift.type, + metadata.operands[1].shift.value); + T out[16 / sizeof(T)] = {0}; + std::fill_n(std::begin(out), I, negate ? ~bits : bits); + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `scvtf vd, + * vn`. + * D represents the destination vector register type (e.g. for vd.2d, D = + * double). + * N represents the source vector register type (e.g. for vn.2s N = int32_t). + * I represents the number of elements in the output array to be + * updated (e.g. for vd.8b I = 8). + * Returns correctly formated RegisterValue. */ +template +RegisterValue vecScvtf_2vecs(std::vector& operands, + std::function func) { + const N* n = operands[0].getAsVector(); + D out[16 / sizeof(D)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = static_cast(n[i]); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `shl vd, vn, #imm`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecShlShift_vecImm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + const T* n = operands[0].getAsVector(); + int64_t shift = metadata.operands[2].imm; + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = static_cast(n[i] << shift); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `shll{2} vd, vn, + * #imm`. + * D represents the destination register type (e.g. for vd.2d D = int64_t). + * N represents the source register type (e.g. for vd.4s D = int32_t). + * I represents the number of elements in the output array to be + * updated (e.g. for vd.8h the I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecShllShift_vecImm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, bool isShll2) { + const N* n = operands[0].getAsVector(); + uint64_t shift = metadata.operands[2].imm; + D out[16 / sizeof(D)] = {0}; + int index = isShll2 ? I : 0; + for (int i = 0; i < I; i++) { + out[i] = n[index] << shift; + index++; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `shrn vd, vn, #imm`. + * Ta represents the type of source operand (e.g. for vn.2d, Ta = uint64_t). + * Tb represents the type of destination operand (e.g. for vd.2s, Tb = + * uint32_t). + * I represents the number of elements in the output array to be + * updated (e.g. for vd.8b I = 8). + * Returns correctly formatted RegisterValue. + */ +template +RegisterValue vecShrnShift_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + bool shrn2 = false) { + const Ta* n = operands[0].getAsVector(); + + uint64_t shift = metadata.operands[2].imm; + + Tb out[16 / sizeof(Tb)] = {0}; + int index = shrn2 ? I : 0; + for (int i = 0; i < I; i++) { + out[index + i] = static_cast(std::trunc(n[i] >> shift)); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `sshr vd, vn, #imm`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecSshrShift_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + const T* n = operands[1].getAsVector(); + uint64_t shift = metadata.operands[2].imm; + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I; i++) { + out[i] = static_cast(std::trunc(n[i] >> shift)); + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `addp rd, vn`. + * T represents the type of operands (e.g. for vn.2d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecSumElems_2ops(std::vector& operands) { + const T* n = operands[0].getAsVector(); + T out = 0; + for (int i = 0; i < I; i++) { + out += n[i]; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `xtn{2} vd, vn`. + * D represents the type of the dest. register (e.g. for vd.s, D = uint32_t). + * N represents the type of the source register (e.g. for vn.d, N = uint64_t). + * I represents the number of elements in the output vector to be + * updated (i.e. for vd.4s I = 4). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecXtn(std::vector& operands, bool isXtn2) { + const D* d; + if (isXtn2) d = operands[0].getAsVector(); + const N* n = operands[isXtn2 ? 1 : 0].getAsVector(); + + D out[16 / sizeof(D)] = {0}; + int index = 0; + + for (int i = 0; i < I; i++) { + if (isXtn2 & (i < (I / 2))) { out[i] = d[i]; - } - out[metadata.operands[0].vector_index] = n; - return {out, 256}; - } - - /** Helper function for NEON instructions with the format ` vd, - * vn`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecLogicOp_2vecs(std::vector& operands, - std::function func) { - const T* n = operands[0].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = func(n[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format ` vd, - * vn, vm`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecLogicOp_3vecs(std::vector& operands, - std::function func) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = func(n[i], m[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `umaxp vd, vn, vm`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecUMaxP(std::vector& operands) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - T out[I]; - for (int i = 0; i < I; i++) { - out[i] = std::max(n[i], m[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `uminp vd, vn, vm`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecUMinP(std::vector& operands) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - T out[I]; - for (int i = 0; i < I; i++) { - out[i] = std::min(n[i], m[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `maxnmp rd, vn`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecMaxnmp_2ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - bool isFP = std::is_floating_point::value; - - T out = n[0]; - for (int i = 1; i < I; i++) { - out = isFP ? std::fmax(n[i], out) : std::max(n[i], out); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `sminv sd, vn`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecMinv_2ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - bool isFP = std::is_floating_point::value; - - T out = n[0]; - for (int i = 1; i < I; i++) { - out = isFP ? std::fmin(n[i], out) : std::min(n[i], out); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `movi vd, #imm`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecMovi_imm( - const simeng::arch::aarch64::InstructionMetadata& metadata) { - bool isFP = std::is_floating_point::value; - const T imm = isFP ? metadata.operands[1].fp - : static_cast(metadata.operands[1].imm); - T out[16 / sizeof(T)] = {0}; - std::fill_n(std::begin(out), I, imm); - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `movi vd, #imm{, lsl - * #shift}`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecMoviShift_imm( - const simeng::arch::aarch64::InstructionMetadata& metadata, bool negate) { - const T bits = shiftValue(static_cast(metadata.operands[1].imm), - metadata.operands[1].shift.type, - metadata.operands[1].shift.value); - T out[16 / sizeof(T)] = {0}; - std::fill_n(std::begin(out), I, negate ? ~bits : bits); - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `scvtf vd, - * vn`. - * D represents the destination vector register type (e.g. for vd.2d, D = - * double). - * N represents the source vector register type (e.g. for vn.2s N = int32_t). - * I represents the number of elements in the output array to be - * updated (e.g. for vd.8b I = 8). - * Returns correctly formated RegisterValue. */ - template - static RegisterValue vecScvtf_2vecs(std::vector& operands, - std::function func) { - const N* n = operands[0].getAsVector(); - D out[16 / sizeof(D)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = static_cast(n[i]); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `shl vd, vn, #imm`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecShlShift_vecImm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* n = operands[0].getAsVector(); - int64_t shift = metadata.operands[2].imm; - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = static_cast(n[i] << shift); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `shll{2} vd, vn, - * #imm`. - * D represents the destination register type (e.g. for vd.2d D = int64_t). - * N represents the source register type (e.g. for vd.4s D = int32_t). - * I represents the number of elements in the output array to be - * updated (e.g. for vd.8h the I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecShllShift_vecImm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - bool isShll2) { - const N* n = operands[0].getAsVector(); - uint64_t shift = metadata.operands[2].imm; - D out[16 / sizeof(D)] = {0}; - int index = isShll2 ? I : 0; - for (int i = 0; i < I; i++) { - out[i] = n[index] << shift; + } else { + out[i] = static_cast(n[index]); index++; } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `shrn vd, vn, #imm`. - * Ta represents the type of source operand (e.g. for vn.2d, Ta = uint64_t). - * Tb represents the type of destination operand (e.g. for vd.2s, Tb = - * uint32_t). - * I represents the number of elements in the output array to be - * updated (e.g. for vd.8b I = 8). - * Returns correctly formatted RegisterValue. - */ - template - static RegisterValue vecShrnShift_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - bool shrn2 = false) { - const Ta* n = operands[0].getAsVector(); - - uint64_t shift = metadata.operands[2].imm; - - Tb out[16 / sizeof(Tb)] = {0}; - int index = shrn2 ? I : 0; - for (int i = 0; i < I; i++) { - out[index + i] = static_cast(std::trunc(n[i] >> shift)); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `sshr vd, vn, #imm`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecSshrShift_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* n = operands[1].getAsVector(); - uint64_t shift = metadata.operands[2].imm; - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I; i++) { - out[i] = static_cast(std::trunc(n[i] >> shift)); - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `addp rd, vn`. - * T represents the type of operands (e.g. for vn.2d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecSumElems_2ops(std::vector& operands) { - const T* n = operands[0].getAsVector(); - T out = 0; - for (int i = 0; i < I; i++) { - out += n[i]; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `xtn{2} vd, vn`. - * D represents the type of the dest. register (e.g. for vd.s, D = uint32_t). - * N represents the type of the source register (e.g. for vn.d, N = uint64_t). - * I represents the number of elements in the output vector to be - * updated (i.e. for vd.4s I = 4). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecXtn(std::vector& operands, - bool isXtn2) { - const D* d; - if (isXtn2) d = operands[0].getAsVector(); - const N* n = operands[isXtn2 ? 1 : 0].getAsVector(); - - D out[16 / sizeof(D)] = {0}; - int index = 0; - - for (int i = 0; i < I; i++) { - if (isXtn2 & (i < (I / 2))) { - out[i] = d[i]; - } else { - out[i] = static_cast(n[index]); - index++; - } - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `tbl Vd.Ta, {Vn.16b, - * ... Vn+3.16b}, Vm.Ta`. - * I represents the number of elements in the output vector to be updated - * (i.e. for vd.8b I = 8, vd.16b I = 16). Only 8 or 16 is valid for TBL - * instructions. - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecTbl( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata) { - // Vd and Vm are only valid in format 8b or 16b - assert(I == 8 || I == 16); - - // Vm contains the indices to fetch from table - const int8_t* Vm = - operands[metadata.operandCount - 2] - .getAsVector(); // final operand is vecMovi_imm - - // All operands except the first and last are the vector registers to - // construct the table from - const uint8_t n_table_regs = metadata.operandCount - 2; - - // Create table from vectors. All table operands must be of 16b format. - int tableSize = 16 * n_table_regs; - uint8_t table[tableSize]; - for (int i = 0; i < n_table_regs; i++) { - const int8_t* currentVector = operands[i].getAsVector(); - for (int j = 0; j < 16; j++) { - table[16 * i + j] = currentVector[j]; - } - } - - int8_t out[16 / sizeof(int8_t)] = {0}; - for (int i = 0; i < I; i++) { - unsigned int index = Vm[i]; - - // If an index is out of range for the table, the result for that lookup - // is 0 - if (index >= tableSize) { - out[i] = 0; - continue; - } - - out[i] = table[index]; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `rev<16,32,64> Vd.T, - * Vn.T`. - * T represents the type of elements to be reversed (e.g. for Vn.d, T = - * uint64_t). - * V represents the variant: 16-bit, 32-bit, 64-bit. (e.g. for 64-bit each - * doubleword of the vector will be reversed). - * I represents the number of elements in the output array to be updated (e.g. - * for vd.8b I = 8). - * It is only valid for T to be a same or smaller width than V. - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue vecRev(std::vector& operands) { - const T* source = operands[0].getAsVector(); - int element_size = (sizeof(T) * 8); - int datasize = I * element_size; - int container_size = V; - int n_containers = datasize / container_size; - int elements_per_container = container_size / element_size; - - int element = 0; - int rev_element; - T out[16 / sizeof(T)] = {0}; - for (int c = 0; c < n_containers; c++) { - rev_element = element + elements_per_container - 1; - for (int e = 0; e < elements_per_container; e++) { - out[rev_element] = source[element]; - element++; - rev_element--; - } - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `trn1 Vd.T, Vn.T, - * Vm.T`. - * T represents the type of operands (e.g. for vn.d, T = uint64_t). - * I represents the number of operands (e.g. for vn.8b, I = 8). - * Returns formatted Register Value. */ - template - static RegisterValue vecTrn1(std::vector& operands) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I / 2; i++) { - out[2 * i] = n[2 * i]; - out[(2 * i) + 1] = m[2 * i]; - } - - return {out, 256}; } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `tbl Vd.Ta, {Vn.16b, + * ... Vn+3.16b}, Vm.Ta`. + * I represents the number of elements in the output vector to be updated + * (i.e. for vd.8b I = 8, vd.16b I = 16). Only 8 or 16 is valid for TBL + * instructions. + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecTbl( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata) { + // Vd and Vm are only valid in format 8b or 16b + assert(I == 8 || I == 16); + + // Vm contains the indices to fetch from table + const int8_t* Vm = + operands[metadata.operandCount - 2] + .getAsVector(); // final operand is vecMovi_imm + + // All operands except the first and last are the vector registers to + // construct the table from + const uint8_t n_table_regs = metadata.operandCount - 2; + + // Create table from vectors. All table operands must be of 16b format. + int tableSize = 16 * n_table_regs; + uint8_t table[tableSize]; + for (int i = 0; i < n_table_regs; i++) { + const int8_t* currentVector = operands[i].getAsVector(); + for (int j = 0; j < 16; j++) { + table[16 * i + j] = currentVector[j]; + } + } + + int8_t out[16 / sizeof(int8_t)] = {0}; + for (int i = 0; i < I; i++) { + unsigned int index = Vm[i]; + + // If an index is out of range for the table, the result for that lookup + // is 0 + if (index >= tableSize) { + out[i] = 0; + continue; + } + + out[i] = table[index]; + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `rev<16,32,64> Vd.T, + * Vn.T`. + * T represents the type of elements to be reversed (e.g. for Vn.d, T = + * uint64_t). + * V represents the variant: 16-bit, 32-bit, 64-bit. (e.g. for 64-bit each + * doubleword of the vector will be reversed). + * I represents the number of elements in the output array to be updated (e.g. + * for vd.8b I = 8). + * It is only valid for T to be a same or smaller width than V. + * Returns correctly formatted RegisterValue. */ +template +RegisterValue vecRev(std::vector& operands) { + const T* source = operands[0].getAsVector(); + int element_size = (sizeof(T) * 8); + int datasize = I * element_size; + int container_size = V; + int n_containers = datasize / container_size; + int elements_per_container = container_size / element_size; + + int element = 0; + int rev_element; + T out[16 / sizeof(T)] = {0}; + for (int c = 0; c < n_containers; c++) { + rev_element = element + elements_per_container - 1; + for (int e = 0; e < elements_per_container; e++) { + out[rev_element] = source[element]; + element++; + rev_element--; + } + } + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `trn1 Vd.T, Vn.T, + * Vm.T`. + * T represents the type of operands (e.g. for vn.d, T = uint64_t). + * I represents the number of operands (e.g. for vn.8b, I = 8). + * Returns formatted Register Value. */ +template +RegisterValue vecTrn1(std::vector& operands) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I / 2; i++) { + out[2 * i] = n[2 * i]; + out[(2 * i) + 1] = m[2 * i]; + } + + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `trn2 Vd.T, Vn.T, + * Vm.T`. + * T represents the type of operands (e.g. for Vn.d, T = uint64_t). + * I represents the number of operands (e.g. for Vn.8b, I = 8). + * Returns formatted Register Value. */ +template +RegisterValue vecTrn2(std::vector& operands) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I / 2; i++) { + out[2 * i] = n[(2 * i) + 1]; + out[(2 * i) + 1] = m[(2 * i) + 1]; + } + + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `uzp<1,2> Vd.T, + * Vn.T, Vm.T`. + * T represents the type of operands (e.g. for Vn.d, T = uint64_t). + * I represents the number of operands (e.g. for Vn.8b, I = 8). + * Returns formatted Register Value. */ +template +RegisterValue vecUzp(std::vector& operands, bool isUzp1) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + T out[16 / sizeof(T)] = {0}; + for (int i = 0; i < I / 2; i++) { + int index = isUzp1 ? (2 * i) : (2 * i) + 1; + out[i] = n[index]; + out[(I / 2) + i] = m[index]; + } + + return {out, 256}; +} + +/** Helper function for NEON instructions with the format `zip<1,2> vd.T, + * vn.T, vm.T`. + * T represents the type of operands (e.g. for vn.d, T = uint64_t). + * I represents the number of elements in the output array to be updated (e.g. + * for vn.8b, I = 8). + * Returns formatted Register Value. */ +template +RegisterValue vecZip(std::vector& operands, bool isZip2) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + T out[16 / sizeof(T)] = {0}; + int index = isZip2 ? (I / 2) : 0; + for (int i = 0; i < I / 2; i++) { + out[2 * i] = n[index]; + out[(2 * i) + 1] = m[index]; + index++; + } + + return {out, 256}; +} - /** Helper function for NEON instructions with the format `trn2 Vd.T, Vn.T, - * Vm.T`. - * T represents the type of operands (e.g. for Vn.d, T = uint64_t). - * I represents the number of operands (e.g. for Vn.8b, I = 8). - * Returns formatted Register Value. */ - template - static RegisterValue vecTrn2(std::vector& operands) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I / 2; i++) { - out[2 * i] = n[(2 * i) + 1]; - out[(2 * i) + 1] = m[(2 * i) + 1]; - } - - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `uzp<1,2> Vd.T, - * Vn.T, Vm.T`. - * T represents the type of operands (e.g. for Vn.d, T = uint64_t). - * I represents the number of operands (e.g. for Vn.8b, I = 8). - * Returns formatted Register Value. */ - template - static RegisterValue vecUzp(std::vector& operands, - bool isUzp1) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - T out[16 / sizeof(T)] = {0}; - for (int i = 0; i < I / 2; i++) { - int index = isUzp1 ? (2 * i) : (2 * i) + 1; - out[i] = n[index]; - out[(I / 2) + i] = m[index]; - } - - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `zip<1,2> vd.T, - * vn.T, vm.T`. - * T represents the type of operands (e.g. for vn.d, T = uint64_t). - * I represents the number of elements in the output array to be updated (e.g. - * for vn.8b, I = 8). - * Returns formatted Register Value. */ - template - static RegisterValue vecZip(std::vector& operands, - bool isZip2) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - T out[16 / sizeof(T)] = {0}; - int index = isZip2 ? (I / 2) : 0; - for (int i = 0; i < I / 2; i++) { - out[2 * i] = n[index]; - out[(2 * i) + 1] = m[index]; - index++; - } - - return {out, 256}; - } -}; } // namespace aarch64 } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/store.hh b/src/include/simeng/arch/aarch64/helpers/store.hh deleted file mode 100644 index 18d3d6f915..0000000000 --- a/src/include/simeng/arch/aarch64/helpers/store.hh +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include "auxiliaryFunctions.hh" - -namespace simeng { -namespace arch { -namespace aarch64 { -class storeHelp { - public: - static void tempFunc() { return; } -}; -} // namespace aarch64 -} // namespace arch -} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/sve.hh b/src/include/simeng/arch/aarch64/helpers/sve.hh index 0e9a800f9f..fa8df45589 100644 --- a/src/include/simeng/arch/aarch64/helpers/sve.hh +++ b/src/include/simeng/arch/aarch64/helpers/sve.hh @@ -8,1713 +8,1714 @@ namespace simeng { namespace arch { namespace aarch64 { -class sveHelp { - public: - /** Helper function for SVE instructions with the format `add zd, zn, zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveAdd_3ops(std::vector& operands, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - out[i] = n[i] + m[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `add zd, zn, #imm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveAdd_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const T imm = static_cast(metadata.operands[2].imm); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - out[i] = n[i] + imm; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `add zdn, pg/m, zdn, - * const`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveAddPredicated_const( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - bool isFP = std::is_floating_point::value; - const uint64_t* p = operands[0].getAsVector(); - const T* d = operands[1].getAsVector(); - const auto con = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = d[i] + con; - else - out[i] = d[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `add zdn, pg/m, zdn, - * zm`. - * T represents the type of operands (e.g. for zdn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveAddPredicated_vecs( - std::vector& operands, const uint16_t VL_bits) { - const uint64_t* p = operands[0].getAsVector(); - const T* d = operands[1].getAsVector(); - const T* m = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = d[i] + m[i]; - else - out[i] = d[i]; - } - return {out, 256}; - } - - /** Helper function for NEON instructions with the format `addv dd, pg, zn`. - * T represents the type of operands (e.g. for zn.s, T = uint32_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveAddvPredicated(std::vector& operands, - const uint16_t VL_bits) { - const uint64_t* p = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - uint64_t out = 0; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out += static_cast(n[i]); - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `adr zd, [zn, zm{, - * lsl #<1,2,3>}]`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveAdr_packedOffsets( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - const int mbytes = 1 << metadata.operands[2].shift.value; - for (int i = 0; i < partition_num; i++) { - out[i] = n[i] + (m[i] * mbytes); - } - return {out, 256}; - } - - /** Helper function for instructions with the format `cmp pd, pg/z, zn, `. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns tuple of type [pred result (array of 4 uint64_t), nzcv]. */ - template - static std::tuple, uint8_t> sveCmpPredicated_toPred( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits, bool cmpToImm, std::function func) { - const uint64_t* p = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - const T* m; - T imm; - if (cmpToImm) - imm = static_cast(metadata.operands[3].imm); - else - m = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - std::array out = {0, 0, 0, 0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - if (cmpToImm) - out[i / (64 / sizeof(T))] |= (func(n[i], imm)) ? (shifted_active) : 0; - else - out[i / (64 / sizeof(T))] |= - (func(n[i], m[i])) ? (shifted_active) : 0; - } - } - // Byte count = sizeof(T) as destination predicate is predicate of T bytes. - return {out, AuxFunc::getNZCVfromPred(out, VL_bits, sizeof(T))}; - } - - /** Helper function for SVE instructions with the format `cnt rd{, - * pattern{, #imm}}`. - * T represents the type of operation (e.g. for CNTD, T = uint64_t). - * Returns single value of type uint64_t. */ - template - static uint64_t sveCnt_gpr( - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const uint8_t imm = static_cast(metadata.operands[1].imm); - - const uint16_t elems = - AuxFunc::sveGetPattern(metadata.operandStr, (sizeof(T) * 8), VL_bits); - return (uint64_t)(elems * imm); - } - - /** Helper function for SVE instructions with the format `cntp xd, pg, pn`. - * T represents the type of operands (e.g. for pn.d, T = uint64_t). - * Returns single value of type uint64_t. */ - template - static uint64_t sveCntp(std::vector& operands, +/** Helper function for SVE instructions with the format `add zd, zn, zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveAdd_3ops(std::vector& operands, const uint16_t VL_bits) { - const uint64_t* pg = operands[0].getAsVector(); - const uint64_t* pn = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - uint64_t count = 0; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (pg[i / (64 / sizeof(T))] & shifted_active) { - count += (pn[i / (64 / sizeof(T))] & shifted_active) ? 1 : 0; - } - } - return count; - } - - /** Helper function for SVE instructions with the format `fcm pd, - * pg/z, zn, zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns an array of 4 uint64_t elements. */ - template - static std::array sveComparePredicated_vecsToPred( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits, bool cmpToZero, std::function func) { - const uint64_t* p = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - const T* m; - if (!cmpToZero) m = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - std::array out = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i / (64 / sizeof(T))] |= - (func(n[i], cmpToZero ? 0.0 : m[i])) ? shifted_active : 0; - } - } - return out; + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + out[i] = n[i] + m[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `add zd, zn, #imm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveAdd_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + const T imm = static_cast(metadata.operands[2].imm); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + out[i] = n[i] + imm; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `add zdn, pg/m, zdn, + * const`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveAddPredicated_const( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + bool isFP = std::is_floating_point::value; + const uint64_t* p = operands[0].getAsVector(); + const T* d = operands[1].getAsVector(); + const auto con = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = d[i] + con; + else + out[i] = d[i]; } + return {out, 256}; +} - /** Helper function for SVE instructions with the format `cpy zd, pg/z, #imm{, - * shift}`. - * T represents the type of operands (e.g. for zd.d, T = int64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveCpy_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const uint64_t* p = operands[0].getAsVector(); - const int16_t imm = metadata.operands[2].imm; - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i] = imm; - } else { - out[i] = 0; - } - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `dec xdn{, - * pattern{, MUL #imm}}`. - * T represents the type of operation (e.g. for DECD, T = uint64_t). - * Returns single value of type uint64_t. */ - template - static int64_t sveDec_scalar( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const int64_t n = operands[0].get(); - const uint8_t imm = static_cast(metadata.operands[1].imm); - const uint16_t elems = - AuxFunc::sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits); - return (n - static_cast(elems * imm)); - } - - /** Helper function for SVE instructions with the format `dup zd, <#imm{, - * shift}, n>`. - * T represents the type of operands (e.g. for zd.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveDup_immOrScalar( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits, bool useImm) { - bool isFP = std::is_floating_point::value; - T imm; - if (useImm) - imm = isFP ? metadata.operands[1].fp - : static_cast(metadata.operands[1].imm); +/** Helper function for SVE instructions with the format `add zdn, pg/m, zdn, + * zm`. + * T represents the type of operands (e.g. for zdn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveAddPredicated_vecs(std::vector& operands, + const uint16_t VL_bits) { + const uint64_t* p = operands[0].getAsVector(); + const T* d = operands[1].getAsVector(); + const T* m = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = d[i] + m[i]; else - imm = operands[0].get(); - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - out[i] = imm; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `dup zd, zn[#imm]`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveDup_vecIndexed( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const uint16_t index = - static_cast(metadata.operands[1].vector_index); - const T* n = operands[0].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - if (index < (VL_bits / (sizeof(T) * 8))) { - const T element = n[index]; - for (int i = 0; i < partition_num; i++) { - out[i] = element; - } - } - return {out, 256}; + out[i] = d[i]; } + return {out, 256}; +} - /** Helper function for SVE instructions with the format `fabs zd, - * pg/z, zn`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFabsPredicated(std::vector& operands, - const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - +/** Helper function for NEON instructions with the format `addv dd, pg, zn`. + * T represents the type of operands (e.g. for zn.s, T = uint32_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveAddvPredicated(std::vector& operands, + const uint16_t VL_bits) { + const uint64_t* p = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + uint64_t out = 0; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out += static_cast(n[i]); + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `adr zd, [zn, zm{, + * lsl #<1,2,3>}]`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveAdr_packedOffsets( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + const int mbytes = 1 << metadata.operands[2].shift.value; + for (int i = 0; i < partition_num; i++) { + out[i] = n[i] + (m[i] * mbytes); + } + return {out, 256}; +} + +/** Helper function for instructions with the format `cmp pd, pg/z, zn, `. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns tuple of type [pred result (array of 4 uint64_t), nzcv]. */ +template +std::tuple, uint8_t> sveCmpPredicated_toPred( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits, bool cmpToImm, std::function func) { + const uint64_t* p = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + const T* m; + T imm; + if (cmpToImm) + imm = static_cast(metadata.operands[3].imm); + else + m = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + std::array out = {0, 0, 0, 0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + if (cmpToImm) + out[i / (64 / sizeof(T))] |= (func(n[i], imm)) ? (shifted_active) : 0; + else + out[i / (64 / sizeof(T))] |= (func(n[i], m[i])) ? (shifted_active) : 0; + } + } + // Byte count = sizeof(T) as destination predicate is predicate of T bytes. + return {out, getNZCVfromPred(out, VL_bits, sizeof(T))}; +} + +/** Helper function for SVE instructions with the format `cnt rd{, + * pattern{, #imm}}`. + * T represents the type of operation (e.g. for CNTD, T = uint64_t). + * Returns single value of type uint64_t. */ +template +uint64_t sveCnt_gpr(const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const uint8_t imm = static_cast(metadata.operands[1].imm); + + const uint16_t elems = + sveGetPattern(metadata.operandStr, (sizeof(T) * 8), VL_bits); + return (uint64_t)(elems * imm); +} + +/** Helper function for SVE instructions with the format `cntp xd, pg, pn`. + * T represents the type of operands (e.g. for pn.d, T = uint64_t). + * Returns single value of type uint64_t. */ +template +uint64_t sveCntp(std::vector& operands, const uint16_t VL_bits) { + const uint64_t* pg = operands[0].getAsVector(); + const uint64_t* pn = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + uint64_t count = 0; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (pg[i / (64 / sizeof(T))] & shifted_active) { + count += (pn[i / (64 / sizeof(T))] & shifted_active) ? 1 : 0; + } + } + return count; +} + +/** Helper function for SVE instructions with the format `fcm pd, + * pg/z, zn, zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns an array of 4 uint64_t elements. */ +template +std::array sveComparePredicated_vecsToPred( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits, bool cmpToZero, std::function func) { + const uint64_t* p = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + const T* m; + if (!cmpToZero) m = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + std::array out = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i / (64 / sizeof(T))] |= + (func(n[i], cmpToZero ? 0.0 : m[i])) ? shifted_active : 0; + } + } + return out; +} + +/** Helper function for SVE instructions with the format `cpy zd, pg/z, #imm{, + * shift}`. + * T represents the type of operands (e.g. for zd.d, T = int64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveCpy_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const uint64_t* p = operands[0].getAsVector(); + const int16_t imm = metadata.operands[2].imm; + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i] = imm; + } else { + out[i] = 0; + } + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `dec xdn{, + * pattern{, MUL #imm}}`. + * T represents the type of operation (e.g. for DECD, T = uint64_t). + * Returns single value of type uint64_t. */ +template +int64_t sveDec_scalar( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const int64_t n = operands[0].get(); + const uint8_t imm = static_cast(metadata.operands[1].imm); + const uint16_t elems = + sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits); + return (n - static_cast(elems * imm)); +} + +/** Helper function for SVE instructions with the format `dup zd, <#imm{, + * shift}, n>`. + * T represents the type of operands (e.g. for zd.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveDup_immOrScalar( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits, bool useImm) { + bool isFP = std::is_floating_point::value; + T imm; + if (useImm) + imm = isFP ? metadata.operands[1].fp + : static_cast(metadata.operands[1].imm); + else + imm = operands[0].get(); + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + out[i] = imm; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `dup zd, zn[#imm]`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveDup_vecIndexed( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const uint16_t index = + static_cast(metadata.operands[1].vector_index); + const T* n = operands[0].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + if (index < (VL_bits / (sizeof(T) * 8))) { + const T element = n[index]; for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i] = ::fabs(n[i]); - } else { - out[i] = d[i]; - } + out[i] = element; } - return {out, 256}; } + return {out, 256}; +} - /** Helper function for SVE instructions with the format `fadda rd, - * pg, rn, zm`. - * T represents the type of operands (e.g. for zm.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFaddaPredicated(std::vector& operands, - const uint16_t VL_bits) { - const uint64_t* p = operands[0].getAsVector(); - const T n = operands[1].get(); - const T* m = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - out[0] = n; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[0] += m[i]; +/** Helper function for SVE instructions with the format `fabs zd, + * pg/z, zn`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFabsPredicated(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i] = ::fabs(n[i]); + } else { + out[i] = d[i]; + } + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fadda rd, + * pg, rn, zm`. + * T represents the type of operands (e.g. for zm.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFaddaPredicated(std::vector& operands, + const uint16_t VL_bits) { + const uint64_t* p = operands[0].getAsVector(); + const T n = operands[1].get(); + const T* m = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + out[0] = n; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[0] += m[i]; + } + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fcadd zdn, pg/m, + * zdn, zm, #imm`. + * T represents the type of operands (e.g. for zm.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFcaddPredicated( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const uint64_t* p = operands[0].getAsVector(); + const T* dn = operands[1].getAsVector(); + const T* m = operands[2].getAsVector(); + const uint32_t imm = metadata.operands[4].imm; + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < (partition_num / 2); i++) { + T acc_r = dn[2 * i]; + T acc_i = dn[2 * i + 1]; + T elt2_r = m[2 * i]; + T elt2_i = m[2 * i + 1]; + + uint64_t shifted_active1 = 1ull + << (((2 * i) % (64 / sizeof(T))) * sizeof(T)); + uint64_t shifted_active2 = + 1ull << (((2 * i + 1) % (64 / sizeof(T))) * sizeof(T)); + if (p[(2 * i) / (64 / sizeof(T))] & shifted_active1) { + if (imm == 90) { + elt2_i = 0.0 - elt2_i; } + acc_r = acc_r + elt2_i; } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fcadd zdn, pg/m, - * zdn, zm, #imm`. - * T represents the type of operands (e.g. for zm.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFcaddPredicated( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const uint64_t* p = operands[0].getAsVector(); - const T* dn = operands[1].getAsVector(); - const T* m = operands[2].getAsVector(); - const uint32_t imm = metadata.operands[4].imm; - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < (partition_num / 2); i++) { - T acc_r = dn[2 * i]; - T acc_i = dn[2 * i + 1]; - T elt2_r = m[2 * i]; - T elt2_i = m[2 * i + 1]; - - uint64_t shifted_active1 = 1ull - << (((2 * i) % (64 / sizeof(T))) * sizeof(T)); - uint64_t shifted_active2 = - 1ull << (((2 * i + 1) % (64 / sizeof(T))) * sizeof(T)); - if (p[(2 * i) / (64 / sizeof(T))] & shifted_active1) { - if (imm == 90) { - elt2_i = 0.0 - elt2_i; - } - acc_r = acc_r + elt2_i; + if (p[(2 * i + 1) / (64 / sizeof(T))] & shifted_active2) { + if (imm == 270) { + elt2_r = 0.0 - elt2_r; } - if (p[(2 * i + 1) / (64 / sizeof(T))] & shifted_active2) { - if (imm == 270) { - elt2_r = 0.0 - elt2_r; - } - acc_i = acc_i + elt2_r; + acc_i = acc_i + elt2_r; + } + out[2 * i] = acc_r; + out[2 * i + 1] = acc_i; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fcmla zda, pg/m, + * zn, zm, #imm`. + * T represents the type of operands (e.g. for zm.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFcmlaPredicated( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const T* da = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + const T* m = operands[3].getAsVector(); + const uint32_t imm = metadata.operands[4].imm; + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + int sel_a = (imm == 0 || imm == 180) ? 0 : 1; + int sel_b = (imm == 0 || imm == 180) ? 1 : 0; + bool neg_i = (imm == 180 || imm == 270) ? true : false; + bool neg_r = (imm == 90 || imm == 180) ? true : false; + for (int i = 0; i < (partition_num / 2); i++) { + T addend_r = da[2 * i]; + T addend_i = da[2 * i + 1]; + T elt1_a = n[2 * i + sel_a]; + T elt2_a = m[2 * i + sel_a]; + T elt2_b = m[2 * i + sel_b]; + uint64_t shifted_active1 = 1ull + << (((2 * i) % (64 / sizeof(T))) * sizeof(T)); + uint64_t shifted_active2 = + 1ull << (((2 * i + 1) % (64 / sizeof(T))) * sizeof(T)); + if (p[(2 * i) / (64 / sizeof(T))] & shifted_active1) { + if (neg_r) { + elt2_a = 0.0 - elt2_a; } - out[2 * i] = acc_r; - out[2 * i + 1] = acc_i; + addend_r = addend_r + (elt1_a * elt2_a); } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fcmla zda, pg/m, - * zn, zm, #imm`. - * T represents the type of operands (e.g. for zm.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFcmlaPredicated( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const T* da = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - const T* m = operands[3].getAsVector(); - const uint32_t imm = metadata.operands[4].imm; - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - int sel_a = (imm == 0 || imm == 180) ? 0 : 1; - int sel_b = (imm == 0 || imm == 180) ? 1 : 0; - bool neg_i = (imm == 180 || imm == 270) ? true : false; - bool neg_r = (imm == 90 || imm == 180) ? true : false; - for (int i = 0; i < (partition_num / 2); i++) { - T addend_r = da[2 * i]; - T addend_i = da[2 * i + 1]; - T elt1_a = n[2 * i + sel_a]; - T elt2_a = m[2 * i + sel_a]; - T elt2_b = m[2 * i + sel_b]; - uint64_t shifted_active1 = 1ull - << (((2 * i) % (64 / sizeof(T))) * sizeof(T)); - uint64_t shifted_active2 = - 1ull << (((2 * i + 1) % (64 / sizeof(T))) * sizeof(T)); - if (p[(2 * i) / (64 / sizeof(T))] & shifted_active1) { - if (neg_r) { - elt2_a = 0.0 - elt2_a; - } - addend_r = addend_r + (elt1_a * elt2_a); + if (p[(2 * i + 1) / (64 / sizeof(T))] & shifted_active2) { + if (neg_i) { + elt2_b = 0.0 - elt2_b; } - if (p[(2 * i + 1) / (64 / sizeof(T))] & shifted_active2) { - if (neg_i) { - elt2_b = 0.0 - elt2_b; - } - addend_i = addend_i + (elt1_a * elt2_b); - } - out[2 * i] = addend_r; - out[2 * i + 1] = addend_i; + addend_i = addend_i + (elt1_a * elt2_b); + } + out[2 * i] = addend_r; + out[2 * i + 1] = addend_i; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fcpy zd, pg/m, + * #const`. + * T represents the type of operands (e.g. for zd.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFcpy_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const T* dn = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T imm = metadata.operands[2].fp; + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i] = imm; + } else { + out[i] = dn[i]; } - return {out, 256}; } + return {out, 256}; +} - /** Helper function for SVE instructions with the format `fcpy zd, pg/m, - * #const`. - * T represents the type of operands (e.g. for zd.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFcpy_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const T* dn = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T imm = metadata.operands[2].fp; - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i] = imm; - } else { - out[i] = dn[i]; - } - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fcvt zd, - * pg/m, zn`. - * D represents the destination vector register type (e.g. zd.s would be - * int32_t). - * N represents the source vector register type (e.g. zn.d would be double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFcvtPredicated(std::vector& operands, - const uint16_t VL_bits) { - const D* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const N* n = operands[2].getAsVector(); - - // Stores size of largest type out of D and N - int lts = std::max(sizeof(D), sizeof(N)); - bool sourceLarger = (sizeof(D) < sizeof(N)) ? true : false; - bool sameDandN = (sizeof(D) == sizeof(N)) ? true : false; - - const uint16_t partition_num = VL_bits / (lts * 8); - D out[256 / sizeof(D)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / lts)) * lts); - int indexOut = (sourceLarger) ? (2 * i) : i; - int indexN = (!sameDandN) && (!sourceLarger) ? (2 * i) : i; - - if (p[i / (64 / lts)] & shifted_active) { - if (n[indexN] > std::numeric_limits::max()) - out[indexOut] = std::numeric_limits::max(); - else if (n[indexN] < std::numeric_limits::lowest()) - out[indexOut] = std::numeric_limits::lowest(); - else - out[indexOut] = static_cast(n[indexN]); - } else { - out[indexOut] = d[indexOut]; - } - if (sourceLarger) out[indexOut + 1] = d[indexOut + 1]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fcvtzs zd, - * pg/m, zn`. - * D represents the destination vector register type (e.g. zd.s would be - * int32_t). - * N represents the source vector register type (e.g. zn.d would be double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFcvtzsPredicated(std::vector& operands, - const uint16_t VL_bits) { - const D* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const N* n = operands[2].getAsVector(); - - // Stores size of largest type out of D and N - int lts = std::max(sizeof(D), sizeof(N)); - bool sameType = (sizeof(D) == sizeof(N)) ? true : false; - bool sourceLarger = (sizeof(D) < sizeof(N)) ? true : false; - - const uint16_t partition_num = VL_bits / (lts * 8); - D out[256 / sizeof(D)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / lts)) * lts); - int indexOut = (sourceLarger) ? (2 * i) : i; - int indexN = ((!sourceLarger) & (!sameType)) ? (2 * i) : i; - - if (p[i / (64 / lts)] & shifted_active) { - if (n[indexN] > std::numeric_limits::max()) - out[indexOut] = std::numeric_limits::max(); - else if (n[indexN] < std::numeric_limits::lowest()) - out[indexOut] = std::numeric_limits::lowest(); - else - out[indexOut] = static_cast(std::trunc(n[indexN])); - // Can be set to 0xFFFFFFFF as will only occur when D=int32_t. - if (sourceLarger) out[indexOut + 1] = (n[indexN] < 0) ? 0xFFFFFFFFu : 0; - } else { - out[indexOut] = d[indexOut]; - if (sourceLarger) out[indexOut + 1] = d[indexOut + 1]; - } - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fmad zd, pg/m, zn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFmadPredicated_vecs( - std::vector& operands, const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - const T* m = operands[3].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = m[i] + (d[i] * n[i]); - else - out[i] = d[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fmls zd, pg/m, zn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFmlsPredicated_vecs( - std::vector& operands, const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - const T* m = operands[3].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = d[i] + (-n[i] * m[i]); +/** Helper function for SVE instructions with the format `fcvt zd, + * pg/m, zn`. + * D represents the destination vector register type (e.g. zd.s would be + * int32_t). + * N represents the source vector register type (e.g. zn.d would be double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFcvtPredicated(std::vector& operands, + const uint16_t VL_bits) { + const D* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const N* n = operands[2].getAsVector(); + + // Stores size of largest type out of D and N + int lts = std::max(sizeof(D), sizeof(N)); + bool sourceLarger = (sizeof(D) < sizeof(N)) ? true : false; + bool sameDandN = (sizeof(D) == sizeof(N)) ? true : false; + + const uint16_t partition_num = VL_bits / (lts * 8); + D out[256 / sizeof(D)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / lts)) * lts); + int indexOut = (sourceLarger) ? (2 * i) : i; + int indexN = (!sameDandN) && (!sourceLarger) ? (2 * i) : i; + + if (p[i / (64 / lts)] & shifted_active) { + if (n[indexN] > std::numeric_limits::max()) + out[indexOut] = std::numeric_limits::max(); + else if (n[indexN] < std::numeric_limits::lowest()) + out[indexOut] = std::numeric_limits::lowest(); else - out[i] = d[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fmsb zd, pg/m, zn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFmsbPredicated_vecs( - std::vector& operands, const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - const T* m = operands[3].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = m[i] + (-d[i] * n[i]); + out[indexOut] = static_cast(n[indexN]); + } else { + out[indexOut] = d[indexOut]; + } + if (sourceLarger) out[indexOut + 1] = d[indexOut + 1]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fcvtzs zd, + * pg/m, zn`. + * D represents the destination vector register type (e.g. zd.s would be + * int32_t). + * N represents the source vector register type (e.g. zn.d would be double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFcvtzsPredicated(std::vector& operands, + const uint16_t VL_bits) { + const D* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const N* n = operands[2].getAsVector(); + + // Stores size of largest type out of D and N + int lts = std::max(sizeof(D), sizeof(N)); + bool sameType = (sizeof(D) == sizeof(N)) ? true : false; + bool sourceLarger = (sizeof(D) < sizeof(N)) ? true : false; + + const uint16_t partition_num = VL_bits / (lts * 8); + D out[256 / sizeof(D)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / lts)) * lts); + int indexOut = (sourceLarger) ? (2 * i) : i; + int indexN = ((!sourceLarger) & (!sameType)) ? (2 * i) : i; + + if (p[i / (64 / lts)] & shifted_active) { + if (n[indexN] > std::numeric_limits::max()) + out[indexOut] = std::numeric_limits::max(); + else if (n[indexN] < std::numeric_limits::lowest()) + out[indexOut] = std::numeric_limits::lowest(); else - out[i] = d[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fmul zd, zn, zm`. - * T represents the type of operands (e.g. for zn.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFmul_3ops(std::vector& operands, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - out[i] = n[i] * m[i]; + out[indexOut] = static_cast(std::trunc(n[indexN])); + // Can be set to 0xFFFFFFFF as will only occur when D=int32_t. + if (sourceLarger) out[indexOut + 1] = (n[indexN] < 0) ? 0xFFFFFFFFu : 0; + } else { + out[indexOut] = d[indexOut]; + if (sourceLarger) out[indexOut + 1] = d[indexOut + 1]; } - return {out, 256}; } + return {out, 256}; +} - /** Helper function for SVE instructions with the format `fneg zd, pg/m, zn`. - * T represents the type of operands (e.g. for zn.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFnegPredicated(std::vector& operands, - const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = -n[i]; - else - out[i] = d[i]; - } - return {out, 256}; +/** Helper function for SVE instructions with the format `fmad zd, pg/m, zn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFmadPredicated_vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + const T* m = operands[3].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = m[i] + (d[i] * n[i]); + else + out[i] = d[i]; } + return {out, 256}; +} - /** Helper function for SVE instructions with the format `fnmls zd, pg/m, zn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFnmlsPredicated(std::vector& operands, - const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - const T* m = operands[3].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = -d[i] + (n[i] * m[i]); - else - out[i] = d[i]; - } - return {out, 256}; +/** Helper function for SVE instructions with the format `fmls zd, pg/m, zn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFmlsPredicated_vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + const T* m = operands[3].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = d[i] + (-n[i] * m[i]); + else + out[i] = d[i]; } + return {out, 256}; +} - /** Helper function for SVE instructions with the format `fnmsb zdn, pg/m, zm, - * za`. - * T represents the type of operands (e.g. for zdn.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFnmsbPredicated(std::vector& operands, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* m = operands[2].getAsVector(); - const T* a = operands[3].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; +/** Helper function for SVE instructions with the format `fmsb zd, pg/m, zn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFmsbPredicated_vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + const T* m = operands[3].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = m[i] + (-d[i] * n[i]); + else + out[i] = d[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fmul zd, zn, zm`. + * T represents the type of operands (e.g. for zn.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFmul_3ops(std::vector& operands, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + out[i] = n[i] * m[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fneg zd, pg/m, zn`. + * T represents the type of operands (e.g. for zn.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFnegPredicated(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = -a[i] + n[i] * m[i]; - else - out[i] = n[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `frintn zd, pg/m, - * zn`. - * D represents the destination vector register type (e.g. zd.s would be - * int32_t). - * N represents the source vector register type (e.g. zn.d would be - * double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFrintnPredicated(std::vector& operands, - const uint16_t VL_bits) { - const D* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const N* n = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(N) * 8); - D out[256 / sizeof(D)] = {0}; + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(N))) * sizeof(N)); - if (p[i / (64 / sizeof(N))] & shifted_active) { - out[i] = AuxFunc::roundToNearestTiesToEven(n[i]); + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = -n[i]; + else + out[i] = d[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fnmls zd, pg/m, zn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFnmlsPredicated(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + const T* m = operands[3].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = -d[i] + (n[i] * m[i]); + else + out[i] = d[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fnmsb zdn, pg/m, zm, + * za`. + * T represents the type of operands (e.g. for zdn.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFnmsbPredicated(std::vector& operands, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* m = operands[2].getAsVector(); + const T* a = operands[3].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = -a[i] + n[i] * m[i]; + else + out[i] = n[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `frintn zd, pg/m, + * zn`. + * T represents the vector type (e.g. zd.s would be float). + * Returns correctly formatted RegisterValue. */ +template +std::enable_if_t, RegisterValue> +sveFrintnPredicated(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + // Get truncation + T trunc = std::trunc(n[i]); + // On tie, round to nearest even + if (std::fabs(n[i] - trunc) == static_cast(0.5)) { + T addand = (trunc > static_cast(0.0)) ? static_cast(1) + : static_cast(-1); + // If odd, add the addand + out[i] = (std::fmod(trunc, static_cast(2.0)) == static_cast(0.0)) + ? trunc + : (trunc + addand); } else { - out[i] = d[i]; + // Else, round to nearest + out[i] = std::round(n[i]); } - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fsqrt zd, - * pg/m, zn`. - * T represents the type of operands (e.g. for zn.d, T = double). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveFsqrtPredicated_2vecs( - std::vector& operands, const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = ::sqrt(n[i]); - else - out[i] = d[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `inc - * xdn{, pattern{, MUL #imm}}`. - * T represents the type of operation (e.g. for INCB, T = int8_t). - * Returns single value of type int64_t. */ - template - static int64_t sveInc_gprImm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const int64_t n = operands[0].get(); - const uint8_t imm = static_cast(metadata.operands[1].imm); - const uint16_t elems = - AuxFunc::sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits); - int64_t out = n + (elems * imm); - return out; - } - - /** Helper function for SVE instructions with the format `inc - * zdn{, pattern{, #imm}}`. - * T represents the type of operands (e.g. for zdn.d, T = int64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveInc_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const uint8_t imm = static_cast(metadata.operands[1].imm); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - typename std::make_signed::type out[256 / sizeof(T)] = {0}; - const uint16_t elems = - AuxFunc::sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits); - - for (int i = 0; i < partition_num; i++) { - out[i] = n[i] + (elems * imm); - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `incp xdn, pm`. - * T represents the type of operands (e.g. for pm.d, T = uint64_t). - * Returns single value of type uint64_t. */ - template - static uint64_t sveIncp_gpr(std::vector& operands, - const uint16_t VL_bits) { - const uint64_t dn = operands[0].get(); - const uint64_t* p = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - uint64_t count = 0; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - count++; + } else { + out[i] = d[i]; + } + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fsqrt zd, + * pg/m, zn`. + * T represents the type of operands (e.g. for zn.d, T = double). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveFsqrtPredicated_2vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = ::sqrt(n[i]); + else + out[i] = d[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `inc + * xdn{, pattern{, MUL #imm}}`. + * T represents the type of operation (e.g. for INCB, T = int8_t). + * Returns single value of type int64_t. */ +template +int64_t sveInc_gprImm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const int64_t n = operands[0].get(); + const uint8_t imm = static_cast(metadata.operands[1].imm); + const uint16_t elems = + sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits); + int64_t out = n + (elems * imm); + return out; +} + +/** Helper function for SVE instructions with the format `inc + * zdn{, pattern{, #imm}}`. + * T represents the type of operands (e.g. for zdn.d, T = int64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveInc_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + const uint8_t imm = static_cast(metadata.operands[1].imm); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + typename std::make_signed::type out[256 / sizeof(T)] = {0}; + const uint16_t elems = + sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits); + + for (int i = 0; i < partition_num; i++) { + out[i] = n[i] + (elems * imm); + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `incp xdn, pm`. + * T represents the type of operands (e.g. for pm.d, T = uint64_t). + * Returns single value of type uint64_t. */ +template +uint64_t sveIncp_gpr(std::vector& operands, + const uint16_t VL_bits) { + const uint64_t dn = operands[0].get(); + const uint64_t* p = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + uint64_t count = 0; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + count++; + } + } + return dn + count; +} + +/** Helper function for SVE instructions with the format `index zd, <#imm, + * rn>, <#imm, rm>`. + * D represents the vector register type (e.g. zd.b would be int8_t). + * N represents the GPR type (e.g. for xn, xm, D = int64). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveIndex( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits, bool op1isImm, bool op2isImm) { + const int op2Index = op1isImm ? 0 : 1; + const auto n = op1isImm ? static_cast(metadata.operands[1].imm) + : static_cast(operands[0].get()); + const auto m = op2isImm ? static_cast(metadata.operands[2].imm) + : static_cast(operands[op2Index].get()); + + const uint16_t partition_num = VL_bits / (sizeof(D) * 8); + D out[256 / sizeof(D)] = {0}; + + for (int i = 0; i < partition_num; i++) { + out[i] = static_cast(n + (i * m)); + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format ` + * pd, pg/z, pn, pm`. + * T represents the type of operands (e.g. for pn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +std::array sveLogicOp_preds( + std::vector& operands, const uint16_t VL_bits, + std::function func) { + const uint64_t* p = operands[0].getAsVector(); + const uint64_t* n = operands[1].getAsVector(); + const uint64_t* m = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + std::array out = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i / (64 / sizeof(T))] |= + (func(n[i / (64 / sizeof(T))], m[i / (64 / sizeof(T))]) & + shifted_active); + } + } + return out; +} + +/** Helper function for SVE instructions with the format ` + * zd, pg/m, zn, zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveLogicOpPredicated_3vecs(std::vector& operands, + const uint16_t VL_bits, + std::function func) { + const uint64_t* p = operands[0].getAsVector(); + const T* dn = operands[1].getAsVector(); + const T* m = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = func(dn[i], m[i]); + else + out[i] = dn[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format ` + * zd, zn, zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveLogicOpUnPredicated_3vecs(std::vector& operands, + const uint16_t VL_bits, + std::function func) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + out[i] = func(n[i], m[i]); + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `lsl sz, zn, #imm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveLsl_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + const T imm = static_cast(metadata.operands[2].imm); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + typename std::make_signed::type out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + out[i] = (n[i] << imm); + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `max zdn, zdn, + * #imm`. + * T represents the type of operands (e.g. for zdn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveMax_vecImm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + T imm = static_cast(metadata.operands[2].imm); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + out[i] = std::max(n[i], imm); + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `max zdn, zdn, + * #imm`. + * T represents the type of operands (e.g. for zdn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveMaxPredicated_vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + const T* m = operands[3].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i] = std::max(n[i], m[i]); + } else + out[i] = d[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fmla zd, pg/m, zn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveMlaPredicated_vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + const T* m = operands[3].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = d[i] + (n[i] * m[i]); + else + out[i] = d[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `fmla zda, zn, + * zm[index]`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveMlaIndexed_vecs( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + const T* m = operands[2].getAsVector(); + const size_t index = static_cast(metadata.operands[2].vector_index); + + const uint16_t elemsPer128 = 128 / (sizeof(T) * 8); + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (size_t i = 0; i < partition_num; i += elemsPer128) { + const T zm_elem = m[i + index]; + for (size_t j = 0; j < elemsPer128; j++) { + out[i + j] = d[i + j] + (n[i + j] * zm_elem); + } + } + + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `movprfx zd, + * pg/z, zn`. + * T represents the type of operands (e.g. for zd.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveMovprfxPredicated_destToZero( + std::vector& operands, const uint16_t VL_bits) { + // TODO: Adopt hint logic of the MOVPRFX instruction + const uint64_t* p = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i] = n[i]; + } else { + out[i] = 0; + } + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `movprfx zd, + * pg/m, zn`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveMovprfxPredicated_destUnchanged( + std::vector& operands, const uint16_t VL_bits) { + // TODO: Adopt hint logic of the MOVPRFX instruction + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i] = n[i]; + } else { + out[i] = d[i]; + } + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `mul zdn, pg/m, zdn, + * `. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveMulPredicated( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits, bool useImm) { + bool isFP = std::is_floating_point::value; + const uint64_t* p = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + const T* m; + T imm; + if (useImm) + imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; + else + m = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i] = n[i] * (useImm ? imm : m[i]); + } else + out[i] = n[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `mulh zdn, pg/m, zdn, + * zm`. + * T represents the type of operands (e.g. for zn.s, T = int32_t). + * TT represents the type twice the length of T (e.g. for T = int8_t, TT = + * int16_T). + * Returns correctly formatted RegisterValue. */ +// TODO : Support for int64_t mulh operations. +template +RegisterValue sveMulhPredicated(std::vector& operands, + const uint16_t VL_bits) { + const uint64_t* p = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + const T* m = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + bool isNeg = false; + T a = n[i]; + T b = m[i]; + if (a < 0) { + isNeg = !isNeg; + a = 0 - a; } - } - return dn + count; - } - - /** Helper function for SVE instructions with the format `index zd, <#imm, - * rn>, <#imm, rm>`. - * D represents the vector register type (e.g. zd.b would be int8_t). - * N represents the GPR type (e.g. for xn, xm, D = int64). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveIndex( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits, bool op1isImm, bool op2isImm) { - const int op2Index = op1isImm ? 0 : 1; - const auto n = op1isImm ? static_cast(metadata.operands[1].imm) - : static_cast(operands[0].get()); - const auto m = op2isImm ? static_cast(metadata.operands[2].imm) - : static_cast(operands[op2Index].get()); - - const uint16_t partition_num = VL_bits / (sizeof(D) * 8); - D out[256 / sizeof(D)] = {0}; - - for (int i = 0; i < partition_num; i++) { - out[i] = static_cast(n + (i * m)); - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format ` - * pd, pg/z, pn, pm`. - * T represents the type of operands (e.g. for pn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static std::array sveLogicOp_preds( - std::vector& operands, const uint16_t VL_bits, - std::function func) { - const uint64_t* p = operands[0].getAsVector(); - const uint64_t* n = operands[1].getAsVector(); - const uint64_t* m = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - std::array out = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i / (64 / sizeof(T))] |= - (func(n[i / (64 / sizeof(T))], m[i / (64 / sizeof(T))]) & - shifted_active); + if (b < 0) { + isNeg = !isNeg; + b = 0 - b; } - } - return out; - } - - /** Helper function for SVE instructions with the format ` - * zd, pg/m, zn, zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveLogicOpPredicated_3vecs( - std::vector& operands, const uint16_t VL_bits, - std::function func) { - const uint64_t* p = operands[0].getAsVector(); - const T* dn = operands[1].getAsVector(); - const T* m = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { + TT tmp = (static_cast(a) * static_cast(b)); + if (isNeg) tmp = 0 - tmp; + + out[i] = static_cast(tmp >> (sizeof(T) * 8)); + } else + out[i] = n[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `orr zd, zn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveOrr_3vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + out[i] = n[i] | m[i]; + } + return {out, 256}; +} + +/** Helper function for SVE2 instructions with the format `psel pd, pn, + * pm.t[wa, #imm]`. + * T represents the type of operands (e.g. for pm.d, T = + * uint64_t). Returns an array of 4 uint64_t elements. */ +template +std::array svePsel( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const uint64_t* pn = operands[0].getAsVector(); + const uint64_t* pm = operands[1].getAsVector(); + const uint32_t wa = operands[2].get(); + const uint32_t imm = metadata.operands[2].sme_index.disp; + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + + uint32_t index = (wa + imm) % partition_num; + uint64_t shifted_active = 1ull << ((index % (64 / sizeof(T))) * sizeof(T)); + + std::array out = {0, 0, 0, 0}; + if (pm[index / (64 / sizeof(T))] & shifted_active) { + out = {pn[0], pn[1], pn[2], pn[3]}; + } + + return out; +} + +/** Helper function for SVE instructions with the format `ptrue pd{, + * pattern}. + * T represents the type of operands (e.g. for pd.d, T = uint64_t). + * Returns an array of 4 uint64_t elements. */ +template +std::array svePtrue( + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + std::array out = {0, 0, 0, 0}; + + // Get pattern + const uint16_t count = + sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits); + // Exit early if count == 0 + if (count == 0) return out; + + for (int i = 0; i < partition_num; i++) { + if (i < count) { uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = func(dn[i], m[i]); - else - out[i] = dn[i]; + out[i / (64 / sizeof(T))] |= shifted_active; } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format ` - * zd, zn, zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveLogicOpUnPredicated_3vecs( - std::vector& operands, const uint16_t VL_bits, - std::function func) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - out[i] = func(n[i], m[i]); - } - return {out, 256}; } + return out; +} - /** Helper function for SVE instructions with the format `lsl sz, zn, #imm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveLsl_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const T imm = static_cast(metadata.operands[2].imm); +/** Helper function for SVE instructions with the format `punpk pd.h, + * pn.b`. + * If `isHI` = false, then PUNPKLO is performed. + * Returns an array of 4 uint64_t elements. */ +std::array svePunpk(std::vector& operands, + const uint16_t VL_bits, bool isHi) { + const uint64_t* n = operands[0].getAsVector(); - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - typename std::make_signed::type out[256 / sizeof(T)] = {0}; + const uint16_t partition_num = VL_bits / 8; + std::array out = {0, 0, 0, 0}; + uint16_t index = isHi ? (partition_num / 2) : 0; - for (int i = 0; i < partition_num; i++) { - out[i] = (n[i] << imm); + for (int i = 0; i < partition_num / 2; i++) { + if (n[index / 64] & 1ull << index % 64) { + out[i / 32] |= 1ull << ((i * 2) % 64); } - return {out, 256}; + index++; } + return out; +} - /** Helper function for SVE instructions with the format `max zdn, zdn, - * #imm`. - * T represents the type of operands (e.g. for zdn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveMax_vecImm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - T imm = static_cast(metadata.operands[2].imm); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - out[i] = std::max(n[i], imm); - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `max zdn, zdn, - * #imm`. - * T represents the type of operands (e.g. for zdn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveMaxPredicated_vecs( - std::vector& operands, const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - const T* m = operands[3].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i] = std::max(n[i], m[i]); - } else - out[i] = d[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fmla zd, pg/m, zn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveMlaPredicated_vecs( - std::vector& operands, const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - const T* m = operands[3].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = d[i] + (n[i] * m[i]); - else - out[i] = d[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `fmla zda, zn, - * zm[index]`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveMlaIndexed_vecs( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - const T* m = operands[2].getAsVector(); - const size_t index = static_cast(metadata.operands[2].vector_index); - - const uint16_t elemsPer128 = 128 / (sizeof(T) * 8); - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (size_t i = 0; i < partition_num; i += elemsPer128) { - const T zm_elem = m[i + index]; - for (size_t j = 0; j < elemsPer128; j++) { - out[i + j] = d[i + j] + (n[i + j] * zm_elem); - } - } - - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `movprfx zd, - * pg/z, zn`. - * T represents the type of operands (e.g. for zd.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveMovprfxPredicated_destToZero( - std::vector& operands, const uint16_t VL_bits) { - // TODO: Adopt hint logic of the MOVPRFX instruction - const uint64_t* p = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); +/** Helper function for SVE instructions with the format `rev pd, pn`. + * T represents the type of operands (e.g. for pd.d, T = uint64_t). + * Returns an array of 4 uint64_t elements. */ +template +std::array sveRev_predicates(std::vector& operands, + const uint16_t VL_bits) { + const uint64_t* n = operands[0].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + std::array out = {0, 0, 0, 0}; + uint16_t index = partition_num - 1; + + for (int i = 0; i < partition_num; i++) { + uint64_t rev_shifted_active = 1ull + << ((index % (64 / sizeof(T))) * sizeof(T)); + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + out[index / (64 / (sizeof(T)))] |= + ((n[i / (64 / (sizeof(T)))] & shifted_active) == shifted_active) + ? rev_shifted_active + : 0; + index--; + } + return out; +} + +/** Helper function for SVE instructions with the format `rev zd, zn`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveRev_vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + uint16_t index = partition_num - 1; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i] = n[i]; - } else { - out[i] = 0; - } - } - return {out, 256}; + for (int i = 0; i < partition_num; i++) { + out[i] = n[index]; + index--; } + return {out, 256}; +} - /** Helper function for SVE instructions with the format `movprfx zd, - * pg/m, zn`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveMovprfxPredicated_destUnchanged( - std::vector& operands, const uint16_t VL_bits) { - // TODO: Adopt hint logic of the MOVPRFX instruction - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i] = n[i]; - } else { - out[i] = d[i]; - } - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `mul zdn, pg/m, zdn, - * `. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveMulPredicated( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits, bool useImm) { - bool isFP = std::is_floating_point::value; - const uint64_t* p = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - const T* m; - T imm; - if (useImm) - imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; +/** Helper function for SVE instructions with the format `sel zd, pg, zn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveSel_zpzz(std::vector& operands, + const uint16_t VL_bits) { + const uint64_t* p = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + const T* m = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) + out[i] = n[i]; else - m = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i] = n[i] * (useImm ? imm : m[i]); - } else - out[i] = n[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `mulh zdn, pg/m, zdn, - * zm`. - * T represents the type of operands (e.g. for zn.s, T = int32_t). - * TT represents the type twice the length of T (e.g. for T = int8_t, TT = - * int16_T). - * Returns correctly formatted RegisterValue. */ - // TODO : Support for int64_t mulh operations. - template - static RegisterValue sveMulhPredicated(std::vector& operands, - const uint16_t VL_bits) { - const uint64_t* p = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - const T* m = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - bool isNeg = false; - T a = n[i]; - T b = m[i]; - if (a < 0) { - isNeg = !isNeg; - a = 0 - a; - } - if (b < 0) { - isNeg = !isNeg; - b = 0 - b; - } - TT tmp = (static_cast(a) * static_cast(b)); - if (isNeg) tmp = 0 - tmp; - - out[i] = static_cast(tmp >> (sizeof(T) * 8)); - } else - out[i] = n[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `orr zd, zn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveOrr_3vecs(std::vector& operands, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - out[i] = n[i] | m[i]; - } - return {out, 256}; - } - - /** Helper function for SVE2 instructions with the format `psel pd, pn, - * pm.t[wa, #imm]`. - * T represents the type of operands (e.g. for pm.d, T = - * uint64_t). Returns an array of 4 uint64_t elements. */ - template - static std::array svePsel( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const uint64_t* pn = operands[0].getAsVector(); - const uint64_t* pm = operands[1].getAsVector(); - const uint32_t wa = operands[2].get(); - const uint32_t imm = metadata.operands[2].sme_index.disp; - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - - uint32_t index = (wa + imm) % partition_num; + out[i] = m[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `sminv rd, pg, zn`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveSminv(std::vector& operands, + const uint16_t VL_bits) { + const uint64_t* p = operands[0].getAsVector(); + const T* n = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out = std::numeric_limits::max(); + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) out = std::min(out, n[i]); + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `Sub zd, zn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveSub_3vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + out[i] = n[i] - m[i]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `Sub zdn, pg/m, zdn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveSubrPredicated_3vecs(std::vector& operands, + const uint16_t VL_bits) { + const uint64_t* p = operands[0].getAsVector(); + const T* dn = operands[1].getAsVector(); + const T* m = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i] = m[i] - dn[i]; + } else { + out[i] = dn[i]; + } + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `Sub zdn, pg/m, zdn, + * #imm`. + * T represents the type of operands (e.g. for zdn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveSubPredicated_imm( + std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + bool isFP = std::is_floating_point::value; + const uint64_t* p = operands[0].getAsVector(); + const T* dn = operands[1].getAsVector(); + const auto imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + out[i] = dn[i] - imm; + } else { + out[i] = dn[i]; + } + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `sxt zd, pg, + * zn`. + * T represents the type of vector registers (e.g. for zd.d, T = int64_t). + * C represents the type of the cast required - is linked to instruction + * variant used (i.e. sxtw requires int32_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveSxtPredicated(std::vector& operands, + const uint16_t VL_bits) { + const T* d = operands[0].getAsVector(); + const uint64_t* p = operands[1].getAsVector(); + const T* n = operands[2].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + for (int i = 0; i < partition_num; i++) { + uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); + if (p[i / (64 / sizeof(T))] & shifted_active) { + // Cast to C to get 'least significant sub-element' + // Then cast back to T to sign-extend this 'sub-element' + out[i] = static_cast(static_cast(n[i])); + } else { + out[i] = d[i]; + } + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `trn1 zd, zn, zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveTrn1_3vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < (partition_num / 2); i++) { + out[2 * i] = n[(2 * i)]; + out[(2 * i) + 1] = m[(2 * i)]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `trn2 zd, zn, zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveTrn2_3vecs(std::vector& operands, + const uint16_t VL_bits) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < (partition_num / 2); i++) { + out[2 * i] = n[(2 * i) + 1]; + out[(2 * i) + 1] = m[(2 * i) + 1]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `unpk>hi,lo> zd, + * zn`. + * D represents the type of the destination register (e.g. int32_t for + * zd.s). + * N represents the type of the source register (e.g. int8_t for zn.b). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveUnpk_vecs(std::vector& operands, + const uint16_t VL_bits, bool isHi) { + const N* n = operands[0].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(D) * 8); + D out[256 / sizeof(D)] = {0}; + + for (int i = 0; i < partition_num; i++) { + int index = isHi ? (partition_num + i) : i; + out[i] = static_cast(n[index]); + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `uqdec + * d{, pattern{, MUL #imm}}`. + * D represents the type of dest. register(e.g. uint32_t for wd). + * N represents the type of the operation (e.g. for UQDECH, N = 16u). + * Returns single value of type uint64_t. */ +template +uint64_t sveUqdec(std::vector& operands, + const simeng::arch::aarch64::InstructionMetadata& metadata, + const uint16_t VL_bits) { + const D d = operands[0].get(); + const uint8_t imm = metadata.operands[1].imm; + const uint16_t count = sveGetPattern(metadata.operandStr, N, VL_bits); + + // The range of possible values does not fit in the range of any integral + // type, so a double is used as an intermediate value. The end result must + // be saturated to fit in uint64_t. + auto intermediate = double(d) - (imm * count); + if (intermediate < 0) { + return (uint64_t)0; + } + return (uint64_t)(d - (imm * count)); +} + +/** Helper function for SVE instructions with the format `uzp<1,2> zd, zn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveUzp_vecs(std::vector& operands, + const uint16_t VL_bits, bool isUzp1) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + for (int i = 0; i < partition_num / 2; i++) { + // UZP1 concatenates even elements. UZP2 concatenates odd. + int index = isUzp1 ? (2 * i) : (2 * i) + 1; + out[i] = n[index]; + } + for (int i = 0; i < partition_num / 2; i++) { + int index = isUzp1 ? (2 * i) : (2 * i) + 1; + out[partition_num / 2 + i] = m[index]; + } + return {out, 256}; +} + +/** Helper function for SVE instructions with the format `whilelo pd, + * n, m`. + * T represents the type of operands n and m (e.g. for wn, T = uint32_t). + * P represents the type of operand p (e.g. for pd.b, P = uint8_t). + * Returns tuple of type [pred results (array of 4 uint64_t), nzcv]. */ +template +std::tuple, uint8_t> sveWhilelo( + std::vector& operands, const uint16_t VL_bits, + bool calcNZCV) { + const T n = operands[0].get(); + const T m = operands[1].get(); + + const uint16_t partition_num = VL_bits / (sizeof(P) * 8); + std::array out = {0, 0, 0, 0}; + uint16_t index = 0; + + for (int i = 0; i < partition_num; i++) { + // Determine whether lane should be active and shift to align with + // element in predicate register. + uint64_t shifted_active = + (n + i) < m ? 1ull << ((i % (64 / (sizeof(P))) * (sizeof(P)))) : 0; + out[index / (64 / (sizeof(P)))] = + out[index / (64 / (sizeof(P)))] | shifted_active; + index++; + } + // Byte count = sizeof(P) as destination predicate is predicate of P + // bytes. + uint8_t nzcv = calcNZCV ? getNZCVfromPred(out, VL_bits, sizeof(P)) : 0; + return {out, nzcv}; +} + +/** Helper function for SVE instructions with the format `zip<1,2> pd, pn, + * pm`. + * T represents the type of operands (e.g. for pn.d, T = uint64_t). + * Returns an array of 4 uint64_t elements. */ +template +std::array sveZip_preds(std::vector& operands, + const uint16_t VL_bits, bool isZip2) { + const uint64_t* n = operands[0].getAsVector(); + const uint64_t* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + std::array out = {0, 0, 0, 0}; + + bool interleave = false; + int index = isZip2 ? (partition_num / 2) : 0; + for (int i = 0; i < partition_num; i++) { uint64_t shifted_active = 1ull << ((index % (64 / sizeof(T))) * sizeof(T)); - - std::array out = {0, 0, 0, 0}; - if (pm[index / (64 / sizeof(T))] & shifted_active) { - out = {pn[0], pn[1], pn[2], pn[3]}; - } - - return out; - } - - /** Helper function for SVE instructions with the format `ptrue pd{, - * pattern}. - * T represents the type of operands (e.g. for pd.d, T = uint64_t). - * Returns an array of 4 uint64_t elements. */ - template - static std::array svePtrue( - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - std::array out = {0, 0, 0, 0}; - - // Get pattern - const uint16_t count = - AuxFunc::sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits); - // Exit early if count == 0 - if (count == 0) return out; - - for (int i = 0; i < partition_num; i++) { - if (i < count) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - out[i / (64 / sizeof(T))] |= shifted_active; - } - } - return out; - } - - /** Helper function for SVE instructions with the format `punpk pd.h, - * pn.b`. - * If `isHI` = false, then PUNPKLO is performed. - * Returns an array of 4 uint64_t elements. */ - static std::array svePunpk(std::vector& operands, - const uint16_t VL_bits, bool isHi) { - const uint64_t* n = operands[0].getAsVector(); - - const uint16_t partition_num = VL_bits / 8; - std::array out = {0, 0, 0, 0}; - uint16_t index = isHi ? (partition_num / 2) : 0; - - for (int i = 0; i < partition_num / 2; i++) { - if (n[index / 64] & 1ull << index % 64) { - out[i / 32] |= 1ull << ((i * 2) % 64); - } + if (interleave) { + out[i / (64 / sizeof(T))] |= + ((m[index / (64 / sizeof(T))] & shifted_active) == shifted_active) + ? static_cast(1ull + << ((i % (64 / sizeof(T))) * sizeof(T))) + : 0; index++; - } - return out; - } - - /** Helper function for SVE instructions with the format `rev pd, pn`. - * T represents the type of operands (e.g. for pd.d, T = uint64_t). - * Returns an array of 4 uint64_t elements. */ - template - static std::array sveRev_predicates( - std::vector& operands, const uint16_t VL_bits) { - const uint64_t* n = operands[0].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - std::array out = {0, 0, 0, 0}; - uint16_t index = partition_num - 1; - - for (int i = 0; i < partition_num; i++) { - uint64_t rev_shifted_active = 1ull - << ((index % (64 / sizeof(T))) * sizeof(T)); - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - out[index / (64 / (sizeof(T)))] |= - ((n[i / (64 / (sizeof(T)))] & shifted_active) == shifted_active) - ? rev_shifted_active + } else { + out[i / (64 / sizeof(T))] |= + ((n[index / (64 / sizeof(T))] & shifted_active) == shifted_active) + ? static_cast(1ull + << ((i % (64 / sizeof(T))) * sizeof(T))) : 0; - index--; - } - return out; - } - - /** Helper function for SVE instructions with the format `rev zd, zn`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveRev_vecs(std::vector& operands, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - uint16_t index = partition_num - 1; - - for (int i = 0; i < partition_num; i++) { - out[i] = n[index]; - index--; } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `sel zd, pg, zn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveSel_zpzz(std::vector& operands, - const uint16_t VL_bits) { - const uint64_t* p = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - const T* m = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) - out[i] = n[i]; - else - out[i] = m[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `sminv rd, pg, zn`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveSminv(std::vector& operands, - const uint16_t VL_bits) { - const uint64_t* p = operands[0].getAsVector(); - const T* n = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out = std::numeric_limits::max(); - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) out = std::min(out, n[i]); - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `Sub zd, zn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveSub_3vecs(std::vector& operands, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - out[i] = n[i] - m[i]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `Sub zdn, pg/m, zdn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveSubrPredicated_3vecs( - std::vector& operands, const uint16_t VL_bits) { - const uint64_t* p = operands[0].getAsVector(); - const T* dn = operands[1].getAsVector(); - const T* m = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i] = m[i] - dn[i]; - } else { - out[i] = dn[i]; - } - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `Sub zdn, pg/m, zdn, - * #imm`. - * T represents the type of operands (e.g. for zdn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveSubPredicated_imm( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - bool isFP = std::is_floating_point::value; - const uint64_t* p = operands[0].getAsVector(); - const T* dn = operands[1].getAsVector(); - const auto imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - out[i] = dn[i] - imm; - } else { - out[i] = dn[i]; - } - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `sxt zd, pg, - * zn`. - * T represents the type of vector registers (e.g. for zd.d, T = int64_t). - * C represents the type of the cast required - is linked to instruction - * variant used (i.e. sxtw requires int32_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveSxtPredicated(std::vector& operands, - const uint16_t VL_bits) { - const T* d = operands[0].getAsVector(); - const uint64_t* p = operands[1].getAsVector(); - const T* n = operands[2].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T)); - if (p[i / (64 / sizeof(T))] & shifted_active) { - // Cast to C to get 'least significant sub-element' - // Then cast back to T to sign-extend this 'sub-element' - out[i] = static_cast(static_cast(n[i])); - } else { - out[i] = d[i]; - } - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `trn1 zd, zn, zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveTrn1_3vecs(std::vector& operands, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < (partition_num / 2); i++) { - out[2 * i] = n[(2 * i)]; - out[(2 * i) + 1] = m[(2 * i)]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `trn2 zd, zn, zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveTrn2_3vecs(std::vector& operands, - const uint16_t VL_bits) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < (partition_num / 2); i++) { - out[2 * i] = n[(2 * i) + 1]; - out[(2 * i) + 1] = m[(2 * i) + 1]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `unpk>hi,lo> zd, - * zn`. - * D represents the type of the destination register (e.g. int32_t for - * zd.s). - * N represents the type of the source register (e.g. int8_t for zn.b). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveUnpk_vecs(std::vector& operands, - const uint16_t VL_bits, bool isHi) { - const N* n = operands[0].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(D) * 8); - D out[256 / sizeof(D)] = {0}; - - for (int i = 0; i < partition_num; i++) { - int index = isHi ? (partition_num + i) : i; - out[i] = static_cast(n[index]); - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `uqdec - * d{, pattern{, MUL #imm}}`. - * D represents the type of dest. register(e.g. uint32_t for wd). - * N represents the type of the operation (e.g. for UQDECH, N = 16u). - * Returns single value of type uint64_t. */ - template - static uint64_t sveUqdec( - std::vector& operands, - const simeng::arch::aarch64::InstructionMetadata& metadata, - const uint16_t VL_bits) { - const D d = operands[0].get(); - const uint8_t imm = metadata.operands[1].imm; - const uint16_t count = - AuxFunc::sveGetPattern(metadata.operandStr, N, VL_bits); - - // The range of possible values does not fit in the range of any integral - // type, so a double is used as an intermediate value. The end result must - // be saturated to fit in uint64_t. - auto intermediate = double(d) - (imm * count); - if (intermediate < 0) { - return (uint64_t)0; - } - return (uint64_t)(d - (imm * count)); - } - - /** Helper function for SVE instructions with the format `uzp<1,2> zd, zn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveUzp_vecs(std::vector& operands, - const uint16_t VL_bits, bool isUzp1) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - for (int i = 0; i < partition_num / 2; i++) { - // UZP1 concatenates even elements. UZP2 concatenates odd. - int index = isUzp1 ? (2 * i) : (2 * i) + 1; - out[i] = n[index]; - } - for (int i = 0; i < partition_num / 2; i++) { - int index = isUzp1 ? (2 * i) : (2 * i) + 1; - out[partition_num / 2 + i] = m[index]; - } - return {out, 256}; - } - - /** Helper function for SVE instructions with the format `whilelo pd, - * n, m`. - * T represents the type of operands n and m (e.g. for wn, T = uint32_t). - * P represents the type of operand p (e.g. for pd.b, P = uint8_t). - * Returns tuple of type [pred results (array of 4 uint64_t), nzcv]. */ - template - static std::tuple, uint8_t> sveWhilelo( - std::vector& operands, const uint16_t VL_bits, - bool calcNZCV) { - const T n = operands[0].get(); - const T m = operands[1].get(); - - const uint16_t partition_num = VL_bits / (sizeof(P) * 8); - std::array out = {0, 0, 0, 0}; - uint16_t index = 0; - - for (int i = 0; i < partition_num; i++) { - // Determine whether lane should be active and shift to align with - // element in predicate register. - uint64_t shifted_active = - (n + i) < m ? 1ull << ((i % (64 / (sizeof(P))) * (sizeof(P)))) : 0; - out[index / (64 / (sizeof(P)))] = - out[index / (64 / (sizeof(P)))] | shifted_active; + interleave = !interleave; + } + return out; +} + +/** Helper function for SVE instructions with the format `zip<1,2> zd, zn, + * zm`. + * T represents the type of operands (e.g. for zn.d, T = uint64_t). + * Returns correctly formatted RegisterValue. */ +template +RegisterValue sveZip_vecs(std::vector& operands, + const uint16_t VL_bits, bool isZip2) { + const T* n = operands[0].getAsVector(); + const T* m = operands[1].getAsVector(); + + const uint16_t partition_num = VL_bits / (sizeof(T) * 8); + T out[256 / sizeof(T)] = {0}; + + bool interleave = false; + int index = isZip2 ? (partition_num / 2) : 0; + for (int i = 0; i < partition_num; i++) { + if (interleave) { + out[i] = m[index]; index++; + } else { + out[i] = n[index]; } - // Byte count = sizeof(P) as destination predicate is predicate of P - // bytes. - uint8_t nzcv = - calcNZCV ? AuxFunc::getNZCVfromPred(out, VL_bits, sizeof(P)) : 0; - return {out, nzcv}; - } - - /** Helper function for SVE instructions with the format `zip<1,2> pd, pn, - * pm`. - * T represents the type of operands (e.g. for pn.d, T = uint64_t). - * Returns an array of 4 uint64_t elements. */ - template - static std::array sveZip_preds( - std::vector& operands, const uint16_t VL_bits, - bool isZip2) { - const uint64_t* n = operands[0].getAsVector(); - const uint64_t* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - std::array out = {0, 0, 0, 0}; - - bool interleave = false; - int index = isZip2 ? (partition_num / 2) : 0; - for (int i = 0; i < partition_num; i++) { - uint64_t shifted_active = 1ull - << ((index % (64 / sizeof(T))) * sizeof(T)); - if (interleave) { - out[i / (64 / sizeof(T))] |= - ((m[index / (64 / sizeof(T))] & shifted_active) == shifted_active) - ? static_cast(1ull - << ((i % (64 / sizeof(T))) * sizeof(T))) - : 0; - index++; - } else { - out[i / (64 / sizeof(T))] |= - ((n[index / (64 / sizeof(T))] & shifted_active) == shifted_active) - ? static_cast(1ull - << ((i % (64 / sizeof(T))) * sizeof(T))) - : 0; - } - interleave = !interleave; - } - return out; - } - - /** Helper function for SVE instructions with the format `zip<1,2> zd, zn, - * zm`. - * T represents the type of operands (e.g. for zn.d, T = uint64_t). - * Returns correctly formatted RegisterValue. */ - template - static RegisterValue sveZip_vecs(std::vector& operands, - const uint16_t VL_bits, bool isZip2) { - const T* n = operands[0].getAsVector(); - const T* m = operands[1].getAsVector(); - - const uint16_t partition_num = VL_bits / (sizeof(T) * 8); - T out[256 / sizeof(T)] = {0}; - - bool interleave = false; - int index = isZip2 ? (partition_num / 2) : 0; - for (int i = 0; i < partition_num; i++) { - if (interleave) { - out[i] = m[index]; - index++; - } else { - out[i] = n[index]; - } - interleave = !interleave; - } - return {out, 256}; - } - - /** Helper function for SVE instructions store instructions to merge - * consecutive active elements into blocks to be written. - * T represents the size of the vector elements (e.g. for zn.d, T = uint64_t). - * C represents the size of the memory elements (e.g. for st1w, C = uint32_t). - * Return a vector of RegisterValues. */ - template - static std::vector sve_merge_store_data(const T* d, - const uint64_t* p, - uint16_t vl_bits) { - std::vector outputData; - - uint16_t numVecElems = (vl_bits / (8 * sizeof(T))); - // Determine how many predicate elements are present per uint64_t. - uint16_t predsPer64 = (64 / sizeof(T)); - - // Determine size of array based on the size of the memory access (This is - // the C specifier in sve instructions) - std::array mData; - uint16_t mdSize = 0; - - for (uint16_t x = 0; x < numVecElems; x++) { - // Determine mask to get predication for active element. - uint64_t shiftedActive = 1ull << ((x % predsPer64) * sizeof(T)); - if (p[x / predsPer64] & shiftedActive) { - mData[mdSize] = static_cast(d[x]); - mdSize++; - } else if (mdSize) { - outputData.push_back( - RegisterValue((char*)mData.data(), mdSize * sizeof(C))); - mdSize = 0; - } - } - if (mdSize) { + interleave = !interleave; + } + return {out, 256}; +} + +/** Helper function for SVE instructions store instructions to merge + * consecutive active elements into blocks to be written. + * T represents the size of the vector elements (e.g. for zn.d, T = uint64_t). + * C represents the size of the memory elements (e.g. for st1w, C = uint32_t). + * Return a vector of RegisterValues. */ +template +std::vector sve_merge_store_data(const T* d, const uint64_t* p, + uint16_t vl_bits) { + std::vector outputData; + + uint16_t numVecElems = (vl_bits / (8 * sizeof(T))); + // Determine how many predicate elements are present per uint64_t. + uint16_t predsPer64 = (64 / sizeof(T)); + + // Determine size of array based on the size of the memory access (This is + // the C specifier in sve instructions) + std::array mData; + uint16_t mdSize = 0; + + for (uint16_t x = 0; x < numVecElems; x++) { + // Determine mask to get predication for active element. + uint64_t shiftedActive = 1ull << ((x % predsPer64) * sizeof(T)); + if (p[x / predsPer64] & shiftedActive) { + mData[mdSize] = static_cast(d[x]); + mdSize++; + } else if (mdSize) { outputData.push_back( RegisterValue((char*)mData.data(), mdSize * sizeof(C))); + mdSize = 0; } - return outputData; } -}; + if (mdSize) { + outputData.push_back( + RegisterValue((char*)mData.data(), mdSize * sizeof(C))); + } + return outputData; +} + } // namespace aarch64 } // namespace arch } // namespace simeng diff --git a/src/include/simeng/arch/riscv/ExceptionHandler.hh b/src/include/simeng/arch/riscv/ExceptionHandler.hh index 02d29c93bb..c422f0e8b6 100644 --- a/src/include/simeng/arch/riscv/ExceptionHandler.hh +++ b/src/include/simeng/arch/riscv/ExceptionHandler.hh @@ -96,6 +96,16 @@ class ExceptionHandler : public simeng::arch::ExceptionHandler { static constexpr Register R3 = {RegisterType::GENERAL, 13}; static constexpr Register R4 = {RegisterType::GENERAL, 14}; static constexpr Register R5 = {RegisterType::GENERAL, 15}; + + /** Let the following ExceptionHandlerTest derived classes be a friend of this + * class to allow proper testing of `readStringThen()`, `readBufferThen()` and + * `printException()` functions. */ + friend class RiscVExceptionHandlerTest_readStringThen_Test; + friend class RiscVExceptionHandlerTest_readStringThen_maxLen0_Test; + friend class RiscVExceptionHandlerTest_readStringThen_maxLenReached_Test; + friend class RiscVExceptionHandlerTest_readBufferThen_Test; + friend class RiscVExceptionHandlerTest_readBufferThen_length0_Test; + friend class RiscVExceptionHandlerTest_printException_Test; }; } // namespace riscv diff --git a/src/include/simeng/arch/riscv/Instruction.hh b/src/include/simeng/arch/riscv/Instruction.hh index 2ba1c03d8c..b3064d6b48 100644 --- a/src/include/simeng/arch/riscv/Instruction.hh +++ b/src/include/simeng/arch/riscv/Instruction.hh @@ -23,6 +23,9 @@ const uint8_t GENERAL = 0; const uint8_t FLOAT = 1; /** The system registers. */ const uint8_t SYSTEM = 2; + +/** A special register value representing the zero register. */ +const Register ZERO_REGISTER = {GENERAL, (uint16_t)0}; } // namespace RegisterType /** A struct holding user-defined execution information for a aarch64 @@ -42,7 +45,6 @@ struct executionInfo { enum class InstructionException { None = 0, EncodingUnallocated, - EncodingNotYetImplemented, ExecutionNotYetImplemented, AliasNotYetImplemented, MisalignedPC, @@ -167,11 +169,6 @@ class Instruction : public simeng::Instruction { /** Retrieve the instruction's associated architecture. */ const Architecture& getArchitecture() const; - /** A special register value representing the zero register. If passed to - * `setSourceRegisters`/`setDestinationRegisters`, the value will be - * automatically supplied as zero. */ - static const Register ZERO_REGISTER; - /** The maximum number of source registers any supported RISC-V instruction * can have. */ static const uint8_t MAX_SOURCE_REGISTERS = 3; diff --git a/src/include/simeng/config/ModelConfig.hh b/src/include/simeng/config/ModelConfig.hh index b16fae5585..8c18b7e0c9 100644 --- a/src/include/simeng/config/ModelConfig.hh +++ b/src/include/simeng/config/ModelConfig.hh @@ -15,6 +15,7 @@ #include "simeng/config/ExpectationNode.hh" #include "simeng/config/yaml/ryml.hh" +#include "simeng/version.hh" namespace simeng { namespace config { @@ -115,6 +116,9 @@ class ModelConfig { /** A string stream containing information about invalid values. */ std::ostringstream invalid_; + + /** The default special file directory. */ + std::string defaultSpecialFilePath_ = SIMENG_BUILD_DIR "/specialFiles/"; }; // namespace ModelConfig } // namespace config diff --git a/src/include/simeng/config/SimInfo.hh b/src/include/simeng/config/SimInfo.hh index 7a247b843e..333014f168 100644 --- a/src/include/simeng/config/SimInfo.hh +++ b/src/include/simeng/config/SimInfo.hh @@ -51,7 +51,7 @@ class SimInfo { getInstance()->modelConfig_.reGenerateDefault(ISA::RV64, force); // Update config path to be the default string - getInstance()->setConfigPath(DEFAULT_STR); + getInstance()->configFilePath_ = DEFAULT_STR; // Replace the validated config with the new default config getInstance()->validatedConfig_ = getInstance()->modelConfig_.getConfig(); @@ -62,11 +62,6 @@ class SimInfo { /** A getter function to retrieve the config file path. */ static std::string getConfigPath() { return getInstance()->configFilePath_; } - /** A setter function to set the config file path. */ - static void setConfigPath(std::string path) { - getInstance()->configFilePath_ = path; - } - /** A getter function to retrieve the simulation mode of the current SimEng * instance. */ static SimulationMode getSimMode() { return getInstance()->mode_; } diff --git a/src/include/simeng/kernel/Linux.hh b/src/include/simeng/kernel/Linux.hh index 0908d59006..b8ec954599 100644 --- a/src/include/simeng/kernel/Linux.hh +++ b/src/include/simeng/kernel/Linux.hh @@ -130,6 +130,9 @@ struct linux_dirent64 { to Linux system calls. */ class Linux { public: + Linux(const std::string specialFiledirPath) + : specialFilesDir_(specialFiledirPath) {} + /** Create a new Linux process running above this kernel. */ void createProcess(const LinuxProcess& process); @@ -252,7 +255,7 @@ class Linux { std::unordered_map specialPathTranslations_; /** Path to the root of the replacement special files. */ - const std::string specialFilesDir_ = SIMENG_BUILD_DIR "/specialFiles"; + const std::string specialFilesDir_; /** Vector of all currently supported special file paths & files.*/ std::vector supportedSpecialFiles_; diff --git a/src/include/simeng/pipeline/A64FXPortAllocator.hh b/src/include/simeng/pipeline/A64FXPortAllocator.hh index 74f27faf25..22261abad2 100644 --- a/src/include/simeng/pipeline/A64FXPortAllocator.hh +++ b/src/include/simeng/pipeline/A64FXPortAllocator.hh @@ -21,17 +21,21 @@ const uint8_t BR = 5; * described in the A64FX Microarchitecture manual. */ class A64FXPortAllocator : public PortAllocator { public: + /** Constructor for the A64FXPortAllocator object. */ A64FXPortAllocator(const std::vector>& portArrangement); + /** Allocate a port for the specified instruction group; returns the allocated + * port. */ uint16_t allocate(const std::vector& ports) override; + /** Inform the allocator that an instruction was issued to the specified port. + */ void issued(uint16_t port) override; + /** Inform the allocator that an instruction will not issue to its + * allocated port. */ void deallocate(uint16_t port) override; - /** A mapping from issye ports to instruction attribute */ - uint8_t attributeMapping(const std::vector& ports); - /** Set function from DispatchIssueUnit to retrieve reservation * station sizes during execution. */ void setRSSizeGetter( @@ -41,6 +45,9 @@ class A64FXPortAllocator : public PortAllocator { void tick() override; private: + /** A mapping from issue ports to instruction attribute */ + uint8_t attributeMapping(const std::vector& ports); + /** An approximate estimation of the index of an instruction within the input * buffer of the dispatch unit. Increments slot at each allocation thus cannot * account for nullptr entries in buffer.*/ diff --git a/src/include/simeng/pipeline/DispatchIssueUnit.hh b/src/include/simeng/pipeline/DispatchIssueUnit.hh index 2e533cf125..dd8654d921 100644 --- a/src/include/simeng/pipeline/DispatchIssueUnit.hh +++ b/src/include/simeng/pipeline/DispatchIssueUnit.hh @@ -30,8 +30,7 @@ struct ReservationStation { uint16_t capacity; /** Number of instructions that can be dispatched to this unit per cycle. */ uint16_t dispatchRate; - /** Current number of non-stalled instructions - * in reservation station */ + /** Current number of instructions in reservation station */ uint16_t currentSize; /** Issue ports belonging to reservation station */ std::vector ports; @@ -75,9 +74,6 @@ class DispatchIssueUnit { void forwardOperands(const span& destinations, const span& values); - /** Set the scoreboard entry for the provided register as ready. */ - void setRegisterReady(Register reg); - /** Clear the RS of all flushed instructions. */ void purgeFlushed(); diff --git a/src/include/simeng/pipeline/ExecuteUnit.hh b/src/include/simeng/pipeline/ExecuteUnit.hh index 8a480fac10..14d8b47e7c 100644 --- a/src/include/simeng/pipeline/ExecuteUnit.hh +++ b/src/include/simeng/pipeline/ExecuteUnit.hh @@ -48,9 +48,9 @@ class ExecuteUnit { * discovered misprediction. */ uint64_t getFlushAddress() const; - /** Retrieve the sequence ID associated with the most recently discovered + /** Retrieve the instruction ID associated with the most recently discovered * misprediction. */ - uint64_t getFlushSeqId() const; + uint64_t getFlushInsnId() const; /** Purge flushed instructions from the internal pipeline and clear any active * stall, if applicable. */ diff --git a/src/include/simeng/pipeline/RegisterAliasTable.hh b/src/include/simeng/pipeline/RegisterAliasTable.hh index e3a30ea7b1..43b8e0db4c 100644 --- a/src/include/simeng/pipeline/RegisterAliasTable.hh +++ b/src/include/simeng/pipeline/RegisterAliasTable.hh @@ -15,7 +15,7 @@ class RegisterAliasTable { * structure, and the corresponding numbers of physical registers that should * be available. */ RegisterAliasTable(std::vector architecturalStructure, - std::vector physicalStructure); + std::vector physicalRegisterCounts); /** Retrieve the current physical register assigned to the provided * architectural register. */ @@ -43,9 +43,6 @@ class RegisterAliasTable { * is reinstated to the mapping table, and the provided register is freed. */ void rewind(Register physical); - /** Free the provided physical register. */ - void free(Register physical); - private: /** The register mapping tables. Holds a map of architectural -> physical * register mappings for each register type. */ diff --git a/src/include/simeng/pipeline/ReorderBuffer.hh b/src/include/simeng/pipeline/ReorderBuffer.hh index 179d9bb689..a98471c2e8 100644 --- a/src/include/simeng/pipeline/ReorderBuffer.hh +++ b/src/include/simeng/pipeline/ReorderBuffer.hh @@ -59,7 +59,7 @@ class ReorderBuffer { unsigned int commit(unsigned int maxCommitSize); /** Flush all instructions with a sequence ID greater than `afterSeqId`. */ - void flush(uint64_t afterSeqId); + void flush(uint64_t afterInsnId); /** Retrieve the current size of the ROB. */ unsigned int size() const; @@ -75,9 +75,9 @@ class ReorderBuffer { * discovered memory order violation. */ uint64_t getFlushAddress() const; - /** Retrieve the sequence ID associated with the most recently discovered + /** Retrieve the instruction ID associated with the most recently discovered * memory order violation. */ - uint64_t getFlushSeqId() const; + uint64_t getFlushInsnId() const; /** Get the number of instructions the ROB has committed. */ uint64_t getInstructionsCommittedCount() const; diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc index 7ab82ecf73..af4ee73b9d 100644 --- a/src/lib/CoreInstance.cc +++ b/src/lib/CoreInstance.cc @@ -5,16 +5,20 @@ namespace simeng { CoreInstance::CoreInstance(std::string executablePath, std::vector executableArgs, ryml::ConstNodeRef config) - : config_(config) { + : config_(config), + kernel_(kernel::Linux( + config_["CPU-Info"]["Special-File-Dir-Path"].as())) { generateCoreModel(executablePath, executableArgs); } CoreInstance::CoreInstance(char* assembledSource, size_t sourceSize, ryml::ConstNodeRef config) - : config_(config) { - source_ = assembledSource; - sourceSize_ = sourceSize; - assembledSource_ = true; + : config_(config), + kernel_(kernel::Linux( + config_["CPU-Info"]["Special-File-Dir-Path"].as())), + source_(assembledSource), + sourceSize_(sourceSize), + assembledSource_(true) { // Pass an empty string for executablePath and empty vector of strings for // executableArgs. generateCoreModel("", std::vector{}); diff --git a/src/lib/FixedLatencyMemoryInterface.cc b/src/lib/FixedLatencyMemoryInterface.cc index 6ccf48a147..4bf57f1f40 100644 --- a/src/lib/FixedLatencyMemoryInterface.cc +++ b/src/lib/FixedLatencyMemoryInterface.cc @@ -1,6 +1,6 @@ #include "simeng/FixedLatencyMemoryInterface.hh" -#include +#include namespace simeng { @@ -24,8 +24,12 @@ void FixedLatencyMemoryInterface::tick() { if (request.write) { // Write: write data directly to memory - assert(target.address + target.size <= size_ && - "Attempted to write beyond memory limit"); + if (target.address + target.size > size_) { + std::cerr << "[SimEng:FixedLatencyMemoryInterface] Attempted to write " + "beyond memory limit." + << std::endl; + exit(1); + } auto ptr = memory_ + target.address; // Copy the data from the RegisterValue to memory diff --git a/src/lib/FlatMemoryInterface.cc b/src/lib/FlatMemoryInterface.cc index 8360626e3f..730e615101 100644 --- a/src/lib/FlatMemoryInterface.cc +++ b/src/lib/FlatMemoryInterface.cc @@ -1,6 +1,5 @@ #include "simeng/FlatMemoryInterface.hh" -#include #include namespace simeng { @@ -25,8 +24,12 @@ void FlatMemoryInterface::requestRead(const MemoryAccessTarget& target, void FlatMemoryInterface::requestWrite(const MemoryAccessTarget& target, const RegisterValue& data) { - assert(target.address + target.size <= size_ && - "Attempted to write beyond memory limit"); + if (target.address + target.size > size_) { + std::cerr << "[SimEng:FlatLatencyMemoryInterface] Attempted to write " + "beyond memory limit." + << std::endl; + exit(1); + } auto ptr = memory_ + target.address; // Copy the data from the RegisterValue to memory diff --git a/src/lib/SpecialFileDirGen.cc b/src/lib/SpecialFileDirGen.cc index 1c18ab51c3..0acf8984eb 100644 --- a/src/lib/SpecialFileDirGen.cc +++ b/src/lib/SpecialFileDirGen.cc @@ -5,7 +5,9 @@ namespace simeng { SpecialFileDirGen::SpecialFileDirGen(ryml::ConstNodeRef config) - : coreCount_(config["CPU-Info"]["Core-Count"].as()), + : specialFilesDir_( + config["CPU-Info"]["Special-File-Dir-Path"].as()), + coreCount_(config["CPU-Info"]["Core-Count"].as()), socketCount_(config["CPU-Info"]["Socket-Count"].as()), smt_(config["CPU-Info"]["SMT"].as()), bogoMIPS_(config["CPU-Info"]["BogoMIPS"].as()), @@ -23,12 +25,12 @@ void SpecialFileDirGen::RemoveExistingSFDir() { const std::string rm_input = "rm -r " + specialFilesDir_; system(rm_input.c_str()); } - const std::string mk_input = "mkdir " + specialFilesDir_; - system(mk_input.c_str()); return; } void SpecialFileDirGen::GenerateSFDir() { + // Create root special files directory + system(("mkdir -p " + specialFilesDir_).c_str()); // Define frequently accessed root directories in special file tree const std::string proc_dir = specialFilesDir_ + "/proc/"; const std::string online_dir = specialFilesDir_ + "/sys/devices/system/cpu/"; diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc index a3e0e698ad..df4e55c07f 100644 --- a/src/lib/arch/aarch64/Instruction.cc +++ b/src/lib/arch/aarch64/Instruction.cc @@ -8,9 +8,6 @@ namespace simeng { namespace arch { namespace aarch64 { -const Register Instruction::ZERO_REGISTER = {RegisterType::GENERAL, - (uint16_t)-1}; - Instruction::Instruction(const Architecture& architecture, const InstructionMetadata& metadata, MicroOpInfo microOpInfo) @@ -37,7 +34,8 @@ Instruction::Instruction(const Architecture& architecture, InstructionException Instruction::getException() const { return exception_; } const span Instruction::getSourceRegisters() const { - return {const_cast(sourceRegisters.data()), sourceRegisterCount}; + return {const_cast(sourceRegisters.data()), + sourceRegisters.size()}; } const span Instruction::getSourceOperands() const { @@ -45,6 +43,10 @@ const span Instruction::getSourceOperands() const { } const span Instruction::getDestinationRegisters() const { + // The `destinationRegisterCount` is used here as the span count value because + // there may be n number of zero registers in the latter indexes of the + // `destinationRegisters` vector. These cannot be written to and hence + // shouldn't be included in the returned span. return {const_cast(destinationRegisters.data()), destinationRegisterCount}; } @@ -55,6 +57,7 @@ bool Instruction::isOperandReady(int index) const { void Instruction::renameSource(uint16_t i, Register renamed) { sourceRegisters[i] = renamed; } + void Instruction::renameDestination(uint16_t i, Register renamed) { destinationRegisters[i] = renamed; } @@ -95,6 +98,10 @@ span Instruction::getData() const { bool Instruction::canExecute() const { return (operandsPending == 0); } const span Instruction::getResults() const { + // The `destinationRegisterCount` is used here as the span count value because + // there may be n number of values attributed to zero registers in the latter + // indexes of the `results` vector. Zero registers cannot be written to and + // hence shouldn't be included in the returned span. return {const_cast(results.data()), destinationRegisterCount}; } @@ -185,6 +192,7 @@ void Instruction::setExecutionInfo(const ExecutionInfo& info) { stallCycles_ = info.stallCycles; supportedPorts_ = info.ports; } + const std::vector& Instruction::getSupportedPorts() { if (supportedPorts_.size() == 0) { exception_ = InstructionException::NoAvailablePort; @@ -199,64 +207,6 @@ const Architecture& Instruction::getArchitecture() const { return architecture_; } -/** Extend `value` according to `extendType`, and left-shift the result by - * `shift` */ -uint64_t Instruction::extendValue(uint64_t value, uint8_t extendType, - uint8_t shift) const { - if (extendType == ARM64_EXT_INVALID && shift == 0) { - // Special case: an invalid shift type with a shift amount of 0 implies an - // identity operation - return value; - } - - uint64_t extended; - switch (extendType) { - case ARM64_EXT_UXTB: - extended = static_cast(value); - break; - case ARM64_EXT_UXTH: - extended = static_cast(value); - break; - case ARM64_EXT_UXTW: - extended = static_cast(value); - break; - case ARM64_EXT_UXTX: - extended = value; - break; - case ARM64_EXT_SXTB: - extended = static_cast(value); - break; - case ARM64_EXT_SXTH: - extended = static_cast(value); - break; - case ARM64_EXT_SXTW: - extended = static_cast(value); - break; - case ARM64_EXT_SXTX: - extended = value; - break; - default: - assert(false && "Invalid extension type"); - return 0; - } - - return extended << shift; -} - -/** Extend `value` using extension/shifting rules defined in `op`. */ -uint64_t Instruction::extendOffset(uint64_t value, - const cs_arm64_op& op) const { - if (op.ext == 0) { - if (op.shift.value == 0) { - return value; - } - if (op.shift.type == 1) { - return extendValue(value, ARM64_EXT_UXTX, op.shift.value); - } - } - return extendValue(value, op.ext, op.shift.value); -} - } // namespace aarch64 } // namespace arch } // namespace simeng diff --git a/src/lib/arch/aarch64/InstructionMetadata.cc b/src/lib/arch/aarch64/InstructionMetadata.cc index 6421664ce0..59f529e3c6 100644 --- a/src/lib/arch/aarch64/InstructionMetadata.cc +++ b/src/lib/arch/aarch64/InstructionMetadata.cc @@ -1463,6 +1463,9 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn) case Opcode::AArch64_STRXui: operands[1].access = CS_AC_READ; break; + case Opcode::AArch64_PFALSE: + operands[0].access = CS_AC_WRITE; + break; case Opcode::AArch64_STR_PXI: [[fallthrough]]; case Opcode::AArch64_STR_ZXI: diff --git a/src/lib/arch/aarch64/Instruction_address.cc b/src/lib/arch/aarch64/Instruction_address.cc index 3878d80648..42b2fed7be 100644 --- a/src/lib/arch/aarch64/Instruction_address.cc +++ b/src/lib/arch/aarch64/Instruction_address.cc @@ -2,6 +2,7 @@ #include #include "InstructionMetadata.hh" +#include "simeng/arch/aarch64/helpers/auxiliaryFunctions.hh" namespace simeng { namespace arch { diff --git a/src/lib/arch/aarch64/Instruction_decode.cc b/src/lib/arch/aarch64/Instruction_decode.cc index 4e60ae1b62..bc90f8a2f9 100644 --- a/src/lib/arch/aarch64/Instruction_decode.cc +++ b/src/lib/arch/aarch64/Instruction_decode.cc @@ -17,6 +17,7 @@ namespace aarch64 { constexpr bool bit(uint32_t value, uint8_t start) { return (value >> start) & 1; } + // Extract bits `start` to `start+width` of `value` constexpr uint32_t bits(uint32_t value, uint8_t start, uint8_t width) { return ((value >> start) & ((1 << width) - 1)); @@ -24,6 +25,7 @@ constexpr uint32_t bits(uint32_t value, uint8_t start, uint8_t width) { // Generate a general purpose register identifier with tag `tag` constexpr Register genReg(uint16_t tag) { return {RegisterType::GENERAL, tag}; } + // Generate a NZCV register identifier constexpr Register nzcvReg() { return {RegisterType::NZCV, 0}; } @@ -97,7 +99,7 @@ Register csRegToRegister(arm64_reg reg) { // ARM64_REG_WZR and _XZR are zero registers, and don't read if (reg == ARM64_REG_WZR || reg == ARM64_REG_XZR) { - return Instruction::ZERO_REGISTER; + return RegisterType::ZERO_REGISTER; } // ARM64_REG_SP and _WSP are stack pointer registers, stored in r31 of the @@ -197,55 +199,58 @@ void Instruction::decode() { sourceRegisters.push_back( csRegToRegister(static_cast(metadata.implicitSources[i]))); operandsPending++; - sourceRegisterCount++; } bool accessesMemory = false; + uint16_t zrDestRegs = 0; // Extract explicit register accesses for (size_t i = 0; i < metadata.operandCount; i++) { const auto& op = metadata.operands[i]; if (op.type == ARM64_OP_REG) { // Register operand - if ((op.access & cs_ac_type::CS_AC_WRITE) && op.reg != ARM64_REG_WZR && - op.reg != ARM64_REG_XZR) { - // Determine the data type the instruction operates on based on the - // register operand used - // Belongs to the predicate group if the destination register is a - // predicate - if (op.reg >= ARM64_REG_V0) { - isVectorData_ = true; - } else if (op.reg >= ARM64_REG_ZAB0 || op.reg == ARM64_REG_ZA) { - isSMEData_ = true; - } else if (op.reg >= ARM64_REG_Z0) { - isSVEData_ = true; - } else if (op.reg <= ARM64_REG_S31 && op.reg >= ARM64_REG_Q0) { - isScalarData_ = true; - } else if (op.reg <= ARM64_REG_P15 && op.reg >= ARM64_REG_P0) { - isPredicate_ = true; - } else if (op.reg <= ARM64_REG_H31 && op.reg >= ARM64_REG_B0) { - isScalarData_ = true; - } + if ((op.access & cs_ac_type::CS_AC_WRITE)) { + if (op.reg != ARM64_REG_WZR && op.reg != ARM64_REG_XZR) { + // Determine the data type the instruction operates on based on the + // register operand used + // Belongs to the predicate group if the destination register is a + // predicate + if (op.reg >= ARM64_REG_V0) { + isVectorData_ = true; + } else if (op.reg >= ARM64_REG_ZAB0 || op.reg == ARM64_REG_ZA) { + isSMEData_ = true; + } else if (op.reg >= ARM64_REG_Z0) { + isSVEData_ = true; + } else if (op.reg <= ARM64_REG_S31 && op.reg >= ARM64_REG_Q0) { + isScalarData_ = true; + } else if (op.reg <= ARM64_REG_P15 && op.reg >= ARM64_REG_P0) { + isPredicate_ = true; + } else if (op.reg <= ARM64_REG_H31 && op.reg >= ARM64_REG_B0) { + isScalarData_ = true; + } - if ((op.reg >= ARM64_REG_ZAB0 && op.reg < ARM64_REG_V0) || - (op.reg == ARM64_REG_ZA)) { - // Add all Matrix register rows as destination operands - std::vector regs = - getZARowVectors(op.reg, architecture_.getStreamingVectorLength()); - for (int i = 0; i < regs.size(); i++) { - destinationRegisters.push_back(regs[i]); + if ((op.reg >= ARM64_REG_ZAB0 && op.reg < ARM64_REG_V0) || + (op.reg == ARM64_REG_ZA)) { + // Add all Matrix register rows as destination operands + std::vector regs = getZARowVectors( + op.reg, architecture_.getStreamingVectorLength()); + for (int i = 0; i < regs.size(); i++) { + destinationRegisters.push_back(regs[i]); + destinationRegisterCount++; + // If WRITE, also need to add to source registers to maintain + // unaltered row values + sourceRegisters.push_back(regs[i]); + operandsPending++; + } + } else { + // Add register writes to destinations, but skip zero-register + // destinations + destinationRegisters.push_back(csRegToRegister(op.reg)); destinationRegisterCount++; - // If WRITE, also need to add to source registers to maintain - // unaltered row values - sourceRegisters.push_back(regs[i]); - sourceRegisterCount++; - operandsPending++; } } else { - // Add register writes to destinations, but skip zero-register - // destinations - destinationRegisters.push_back(csRegToRegister(op.reg)); - destinationRegisterCount++; + // Need to allocate extra space in results vector for zero destination + zrDestRegs++; } } if (op.access & cs_ac_type::CS_AC_READ) { @@ -256,14 +261,12 @@ void Instruction::decode() { getZARowVectors(op.reg, architecture_.getStreamingVectorLength()); for (int i = 0; i < regs.size(); i++) { sourceRegisters.push_back(regs[i]); - sourceRegisterCount++; operandsPending++; } } else { // Add register reads to destinations sourceRegisters.push_back(csRegToRegister(op.reg)); operandsPending++; - sourceRegisterCount++; } if (op.shift.value > 0) isNoShift_ = false; // Identify shift operands } @@ -271,7 +274,6 @@ void Instruction::decode() { accessesMemory = true; sourceRegisters.push_back(csRegToRegister(op.mem.base)); operandsPending++; - sourceRegisterCount++; if (metadata.writeback) { // Writeback instructions modify the base address @@ -282,7 +284,6 @@ void Instruction::decode() { // Register offset; add to sources sourceRegisters.push_back(csRegToRegister(op.mem.index)); operandsPending++; - sourceRegisterCount++; } } else if (op.type == ARM64_OP_SME_INDEX) { // SME instruction with index std::vector regs; @@ -297,7 +298,6 @@ void Instruction::decode() { // un-updated rows for (int i = 0; i < regs.size(); i++) { sourceRegisters.push_back(regs[i]); - sourceRegisterCount++; operandsPending++; if (op.access & cs_ac_type::CS_AC_WRITE) { destinationRegisters.push_back(regs[i]); @@ -314,25 +314,20 @@ void Instruction::decode() { } else if (op.access & cs_ac_type::CS_AC_READ) { sourceRegisters.push_back(csRegToRegister(op.sme_index.reg)); operandsPending++; - sourceRegisterCount++; } } // Register that is base of index will always be a source operand sourceRegisters.push_back(csRegToRegister(op.sme_index.base)); operandsPending++; - sourceRegisterCount++; } else if (op.type == ARM64_OP_REG_MRS) { int32_t sysRegTag = architecture_.getSystemRegisterTag(op.imm); if (sysRegTag == -1) { exceptionEncountered_ = true; exception_ = InstructionException::UnmappedSysReg; - // Clear any registered operands - sourceRegisterCount = 0; - destinationRegisterCount = 0; + return; } else { sourceRegisters.push_back( {RegisterType::SYSTEM, static_cast(sysRegTag)}); - sourceRegisterCount++; operandsPending++; } } else if (op.type == ARM64_OP_REG_MSR) { @@ -340,9 +335,7 @@ void Instruction::decode() { if (sysRegTag == -1) { exceptionEncountered_ = true; exception_ = InstructionException::UnmappedSysReg; - // Clear any registered operands - sourceRegisterCount = 0; - destinationRegisterCount = 0; + return; } else { destinationRegisters.push_back( {RegisterType::SYSTEM, static_cast(sysRegTag)}); @@ -638,15 +631,17 @@ void Instruction::decode() { } // Allocate enough entries in results vector - results.resize(destinationRegisterCount + 1); + results.resize(destinationRegisterCount + zrDestRegs); // Allocate enough entries in the operands vector - operands.resize(sourceRegisterCount + 1); + operands.resize(sourceRegisters.size()); // Catch zero register references and pre-complete those operands - for (uint16_t i = 0; i < sourceRegisterCount; i++) { - if (sourceRegisters[i] == Instruction::ZERO_REGISTER) { - operands[i] = RegisterValue(0, 8); - operandsPending--; + if (!(isSMEData_)) { + for (uint16_t i = 0; i < sourceRegisters.size(); i++) { + if (sourceRegisters[i] == RegisterType::ZERO_REGISTER) { + operands[i] = RegisterValue(0, 8); + operandsPending--; + } } } } diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc index 03fe5a5410..7f75fb4340 100644 --- a/src/lib/arch/aarch64/Instruction_execute.cc +++ b/src/lib/arch/aarch64/Instruction_execute.cc @@ -10,11 +10,9 @@ #include "simeng/arch/aarch64/helpers/conditional.hh" #include "simeng/arch/aarch64/helpers/divide.hh" #include "simeng/arch/aarch64/helpers/float.hh" -#include "simeng/arch/aarch64/helpers/load.hh" #include "simeng/arch/aarch64/helpers/logical.hh" #include "simeng/arch/aarch64/helpers/multiply.hh" #include "simeng/arch/aarch64/helpers/neon.hh" -#include "simeng/arch/aarch64/helpers/store.hh" #include "simeng/arch/aarch64/helpers/sve.hh" namespace simeng { @@ -106,7 +104,7 @@ void Instruction::execute() { } else { switch (metadata.opcode) { case Opcode::AArch64_ADCXr: { // adc xd, xn, xm - auto [result, nzcv] = arithmeticHelp::addCarry_3ops(operands); + auto [result, nzcv] = addCarry_3ops(operands); results[0] = result; break; } @@ -119,56 +117,52 @@ void Instruction::execute() { break; } case Opcode::AArch64_ADDPv16i8: { // addp vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecAddp_3ops(operands); + results[0] = vecAddp_3ops(operands); break; } case Opcode::AArch64_ADDPv2i64: { // addp vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecAddp_3ops(operands); + results[0] = vecAddp_3ops(operands); break; } case Opcode::AArch64_ADDPv2i64p: { // addp dd, vn.2d - results[0] = neonHelp::vecSumElems_2ops(operands); + results[0] = vecSumElems_2ops(operands); break; } case Opcode::AArch64_ADDPv4i32: { // addp vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecAddp_3ops(operands); + results[0] = vecAddp_3ops(operands); break; } case Opcode::AArch64_ADDPv8i16: { // addp vd.8h, vn.8h, vm.8h - results[0] = neonHelp::vecAddp_3ops(operands); + results[0] = vecAddp_3ops(operands); break; } case Opcode::AArch64_ADDSWri: { // adds wd, wn, #imm{, shift} - auto [result, nzcv] = - arithmeticHelp::addShift_imm(operands, metadata, true); + auto [result, nzcv] = addShift_imm(operands, metadata, true); results[0] = nzcv; results[1] = {result, 8}; break; } case Opcode::AArch64_ADDSWrs: { // adds wd, wn, wm{, shift} - auto [result, nzcv] = - arithmeticHelp::addShift_3ops(operands, metadata, true); + auto [result, nzcv] = addShift_3ops(operands, metadata, true); results[0] = nzcv; results[1] = {result, 8}; break; } case Opcode::AArch64_ADDSWrx: { // adds wd, wn, wm{, extend {#amount}} auto [result, nzcv] = - arithmeticHelp::addExtend_3ops(operands, metadata, true); + addExtend_3ops(operands, metadata, true); results[0] = nzcv; results[1] = {result, 8}; break; } case Opcode::AArch64_ADDSXri: { // adds xd, xn, #imm{, shift} - auto [result, nzcv] = - arithmeticHelp::addShift_imm(operands, metadata, true); + auto [result, nzcv] = addShift_imm(operands, metadata, true); results[0] = nzcv; results[1] = result; break; } case Opcode::AArch64_ADDSXrs: { // adds xd, xn, xm{, shift} - auto [result, nzcv] = - arithmeticHelp::addShift_3ops(operands, metadata, true); + auto [result, nzcv] = addShift_3ops(operands, metadata, true); results[0] = nzcv; results[1] = result; break; @@ -176,7 +170,7 @@ void Instruction::execute() { case Opcode::AArch64_ADDSXrx: // adds xd, xn, wm{, extend {#amount}} case Opcode::AArch64_ADDSXrx64: { // adds xd, xn, xm{, extend {#amount}} auto [result, nzcv] = - arithmeticHelp::addExtend_3ops(operands, metadata, true); + addExtend_3ops(operands, metadata, true); results[0] = nzcv; results[1] = RegisterValue(result, 8); break; @@ -190,135 +184,130 @@ void Instruction::execute() { break; } case Opcode::AArch64_ADDVv4i16v: { // addv hd, vn.4h - results[0] = neonHelp::vecSumElems_2ops(operands); + results[0] = vecSumElems_2ops(operands); break; } case Opcode::AArch64_ADDVv4i32v: { // addv sd, vn.4s - results[0] = neonHelp::vecSumElems_2ops(operands); + results[0] = vecSumElems_2ops(operands); break; } case Opcode::AArch64_ADDVv8i8v: { // addv bd, vn.8b - results[0] = neonHelp::vecSumElems_2ops(operands); + results[0] = vecSumElems_2ops(operands); break; } case Opcode::AArch64_ADDWri: { // add wd, wn, #imm{, shift} - auto [result, nzcv] = - arithmeticHelp::addShift_imm(operands, metadata, false); + auto [result, nzcv] = addShift_imm(operands, metadata, false); results[0] = {result, 8}; break; } case Opcode::AArch64_ADDWrs: { // add wd, wn, wm{, shift #amount} auto [result, nzcv] = - arithmeticHelp::addShift_3ops(operands, metadata, false); + addShift_3ops(operands, metadata, false); results[0] = {result, 8}; break; } case Opcode::AArch64_ADDWrx: { // add wd, wn, wm{, extend #amount} auto [result, nzcv] = - arithmeticHelp::addExtend_3ops(operands, metadata, false); + addExtend_3ops(operands, metadata, false); results[0] = {result, 8}; break; } case Opcode::AArch64_ADDXri: { // add xd, xn, #imm{, shift} - auto [result, nzcv] = - arithmeticHelp::addShift_imm(operands, metadata, false); + auto [result, nzcv] = addShift_imm(operands, metadata, false); results[0] = result; break; } case Opcode::AArch64_ADDXrs: { // add xd, xn, xm, {shift #amount} auto [result, nzcv] = - arithmeticHelp::addShift_3ops(operands, metadata, false); + addShift_3ops(operands, metadata, false); results[0] = result; break; } case Opcode::AArch64_ADDXrx: // add xd, xn, wm{, extend {#amount}} case Opcode::AArch64_ADDXrx64: { // add xd, xn, xm{, extend {#amount}} auto [result, nzcv] = - arithmeticHelp::addExtend_3ops(operands, metadata, false); + addExtend_3ops(operands, metadata, false); results[0] = result; break; } case Opcode::AArch64_ADD_ZI_B: { // add zdn.b, zdn.b, imm{, shift} - results[0] = sveHelp::sveAdd_imm(operands, metadata, VL_bits); + results[0] = sveAdd_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_ADD_ZI_D: { // add zdn.d, zdn.d, imm{, shift} - results[0] = sveHelp::sveAdd_imm(operands, metadata, VL_bits); + results[0] = sveAdd_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_ADD_ZI_H: { // add zdn.h zdn.h, imm{, shift} - results[0] = sveHelp::sveAdd_imm(operands, metadata, VL_bits); + results[0] = sveAdd_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_ADD_ZI_S: { // add zdn.s, zdn.s, imm{, shift} - results[0] = sveHelp::sveAdd_imm(operands, metadata, VL_bits); + results[0] = sveAdd_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_ADD_ZPmZ_B: { // add zdn.b, pg/m, zdn.b, zm.b - results[0] = sveHelp::sveAddPredicated_vecs(operands, VL_bits); + results[0] = sveAddPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_ADD_ZPmZ_D: { // add zdn.d, pg/m, zdn.d, zm.d - results[0] = - sveHelp::sveAddPredicated_vecs(operands, VL_bits); + results[0] = sveAddPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_ADD_ZPmZ_H: { // add zdn.h, pg/m, zdn.h, zm.h - results[0] = - sveHelp::sveAddPredicated_vecs(operands, VL_bits); + results[0] = sveAddPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_ADD_ZPmZ_S: { // add zdn.s, pg/m, zdn.s, zm.s - results[0] = - sveHelp::sveAddPredicated_vecs(operands, VL_bits); + results[0] = sveAddPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_ADD_ZZZ_B: { // add zd.b, zn.b, zm.b - results[0] = sveHelp::sveAdd_3ops(operands, VL_bits); + results[0] = sveAdd_3ops(operands, VL_bits); break; } case Opcode::AArch64_ADD_ZZZ_D: { // add zd.d, zn.d, zm.d - results[0] = sveHelp::sveAdd_3ops(operands, VL_bits); + results[0] = sveAdd_3ops(operands, VL_bits); break; } case Opcode::AArch64_ADD_ZZZ_H: { // add zd.h, zn.h, zm.h - results[0] = sveHelp::sveAdd_3ops(operands, VL_bits); + results[0] = sveAdd_3ops(operands, VL_bits); break; } case Opcode::AArch64_ADD_ZZZ_S: { // add zd.s, zn.s, zm.s - results[0] = sveHelp::sveAdd_3ops(operands, VL_bits); + results[0] = sveAdd_3ops(operands, VL_bits); break; } case Opcode::AArch64_ADDv16i8: { // add vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_ADDv1i64: { // add dd, dn, dm - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_ADDv2i32: { // add vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_ADDv2i64: { // add vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_ADDv4i16: { // add vd.4h, vn.4h, vm.4h - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_ADDv4i32: { // add vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_ADDv8i16: { // add vd.8h, vn.8h, vm.8h - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_ADDv8i8: { // add vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_ADR: { // adr xd, #imm @@ -336,20 +325,20 @@ void Instruction::execute() { case Opcode::AArch64_ADR_LSL_ZZZ_D_1: // adr zd.d, [zn.d, zm.d, lsl #1] case Opcode::AArch64_ADR_LSL_ZZZ_D_2: // adr zd.d, [zn.d, zm.d, lsl #2] case Opcode::AArch64_ADR_LSL_ZZZ_D_3: { // adr zd.d, [zn.d, zm.d, lsl #3] - results[0] = sveHelp::sveAdr_packedOffsets(operands, metadata, - VL_bits); + results[0] = + sveAdr_packedOffsets(operands, metadata, VL_bits); break; } case Opcode::AArch64_ADR_LSL_ZZZ_S_0: // adr zd.s, [zn.s, zm.s] case Opcode::AArch64_ADR_LSL_ZZZ_S_1: // adr zd.s, [zn.s, zm.s, lsl #1] case Opcode::AArch64_ADR_LSL_ZZZ_S_2: // adr zd.s, [zn.s, zm.s, lsl #2] case Opcode::AArch64_ADR_LSL_ZZZ_S_3: { // adr zd.s, [zn.s, zm.s, lsl #3] - results[0] = sveHelp::sveAdr_packedOffsets(operands, metadata, - VL_bits); + results[0] = + sveAdr_packedOffsets(operands, metadata, VL_bits); break; } case Opcode::AArch64_ANDSWri: { // ands wd, wn, #imm - auto [result, nzcv] = logicalHelp::logicOp_imm( + auto [result, nzcv] = logicOp_imm( operands, metadata, true, [](uint32_t x, uint32_t y) -> uint32_t { return x & y; }); results[0] = nzcv; @@ -357,7 +346,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_ANDSWrs: { // ands wd, wn, wm{, shift #amount} - auto [result, nzcv] = logicalHelp::logicOpShift_3ops( + auto [result, nzcv] = logicOpShift_3ops( operands, metadata, true, [](uint32_t x, uint32_t y) -> uint32_t { return x & y; }); results[0] = nzcv; @@ -365,7 +354,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_ANDSXri: { // ands xd, xn, #imm - auto [result, nzcv] = logicalHelp::logicOp_imm( + auto [result, nzcv] = logicOp_imm( operands, metadata, true, [](uint64_t x, uint64_t y) -> uint64_t { return x & y; }); results[0] = nzcv; @@ -373,7 +362,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_ANDSXrs: { // ands xd, xn, xm{, shift #amount} - auto [result, nzcv] = logicalHelp::logicOpShift_3ops( + auto [result, nzcv] = logicOpShift_3ops( operands, metadata, true, [](uint64_t x, uint64_t y) -> uint64_t { return x & y; }); results[0] = nzcv; @@ -381,35 +370,35 @@ void Instruction::execute() { break; } case Opcode::AArch64_ANDWri: { // and wd, wn, #imm - auto [result, nzcv] = logicalHelp::logicOp_imm( + auto [result, nzcv] = logicOp_imm( operands, metadata, false, [](uint32_t x, uint32_t y) -> uint32_t { return x & y; }); results[0] = {result, 8}; break; } case Opcode::AArch64_ANDWrs: { // and wd, wn, wm{, shift #amount} - auto [result, nzcv] = logicalHelp::logicOpShift_3ops( + auto [result, nzcv] = logicOpShift_3ops( operands, metadata, false, [](uint32_t x, uint32_t y) -> uint32_t { return x & y; }); results[0] = {result, 8}; break; } case Opcode::AArch64_ANDXri: { // and xd, xn, #imm - auto [result, nzcv] = logicalHelp::logicOp_imm( + auto [result, nzcv] = logicOp_imm( operands, metadata, false, [](uint64_t x, uint64_t y) -> uint64_t { return x & y; }); results[0] = result; break; } case Opcode::AArch64_ANDXrs: { // and xd, xn, xm{, shift #amount} - auto [result, nzcv] = logicalHelp::logicOpShift_3ops( + auto [result, nzcv] = logicOpShift_3ops( operands, metadata, false, [](uint64_t x, uint64_t y) -> uint64_t { return x & y; }); results[0] = result; break; } case Opcode::AArch64_AND_PPzPP: { // and pd.b, pg/z, pn.b, pm.b - results[0] = sveHelp::sveLogicOp_preds( + results[0] = sveLogicOp_preds( operands, VL_bits, [](uint64_t x, uint64_t y) -> uint64_t { return x & y; }); break; @@ -427,45 +416,45 @@ void Instruction::execute() { break; } case Opcode::AArch64_AND_ZPmZ_B: { // and zdn.b, pg/m, zdn.b, zm.b - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](uint8_t x, uint8_t y) -> uint8_t { return x & y; }); break; } case Opcode::AArch64_AND_ZPmZ_D: { // and zdn.d, pg/m, zdn.d, zm.d - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](uint64_t x, uint64_t y) -> uint64_t { return x & y; }); break; } case Opcode::AArch64_AND_ZPmZ_H: { // and zdn.h, pg/m, zdn.h, zm.h - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](uint16_t x, uint16_t y) -> uint16_t { return x & y; }); break; } case Opcode::AArch64_AND_ZPmZ_S: { // and zdn.s, pg/m, zdn.s, zm.s - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](uint32_t x, uint32_t y) -> uint32_t { return x & y; }); break; } case Opcode::AArch64_ANDv16i8: { // and vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint8_t x, uint8_t y) -> uint8_t { return x & y; }); break; } case Opcode::AArch64_ANDv8i8: { // and vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint8_t x, uint8_t y) -> uint8_t { return x & y; }); break; } case Opcode::AArch64_ASRVWr: { // asrv wd, wn, wm - results[0] = {logicalHelp::asrv_3gpr(operands), 8}; + results[0] = {asrv_3gpr(operands), 8}; break; } case Opcode::AArch64_ASRVXr: { // asrv xd, xn, xm - results[0] = logicalHelp::asrv_3gpr(operands); + results[0] = asrv_3gpr(operands); break; } case Opcode::AArch64_B: { // b label @@ -474,68 +463,63 @@ void Instruction::execute() { break; } case Opcode::AArch64_BFMWri: { // bfm wd, wn, #immr, #imms - results[0] = { - bitmanipHelp::bfm_2imms(operands, metadata, false, false), - 8}; + results[0] = {bfm_2imms(operands, metadata, false, false), 8}; break; } case Opcode::AArch64_BFMXri: { // bfm xd, xn, #immr, #imms - results[0] = - bitmanipHelp::bfm_2imms(operands, metadata, false, false); + results[0] = bfm_2imms(operands, metadata, false, false); break; } case Opcode::AArch64_BICSWrs: { // bics wd, wn, wm{, shift #amount} - auto [result, nzcv] = - logicalHelp::bicShift_3ops(operands, metadata, true); + auto [result, nzcv] = bicShift_3ops(operands, metadata, true); results[0] = nzcv; results[1] = {result, 8}; break; } case Opcode::AArch64_BICSXrs: { // bics xd, xn, xm{, shift #amount} - auto [result, nzcv] = - logicalHelp::bicShift_3ops(operands, metadata, true); + auto [result, nzcv] = bicShift_3ops(operands, metadata, true); results[0] = nzcv; results[1] = result; break; } case Opcode::AArch64_BICWrs: { // bic wd, wn, wm{, shift #amount} auto [result, nzcv] = - logicalHelp::bicShift_3ops(operands, metadata, false); + bicShift_3ops(operands, metadata, false); results[0] = {result, 8}; break; } case Opcode::AArch64_BICXrs: { // bic xd, xn, xm{, shift #amount} auto [result, nzcv] = - logicalHelp::bicShift_3ops(operands, metadata, false); + bicShift_3ops(operands, metadata, false); results[0] = result; break; } case Opcode::AArch64_BICv16i8: { // bic vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecBic_3ops(operands); + results[0] = vecBic_3ops(operands); break; } case Opcode::AArch64_BICv4i32: { // bic vd.4s, #imm{, lsl #shift} - results[0] = neonHelp::vecBicShift_imm(operands, metadata); + results[0] = vecBicShift_imm(operands, metadata); break; } case Opcode::AArch64_BICv8i16: { // bic vd.8h, #imm{, lsl #shift} - results[0] = neonHelp::vecBicShift_imm(operands, metadata); + results[0] = vecBicShift_imm(operands, metadata); break; } case Opcode::AArch64_BICv8i8: { // bic vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecBic_3ops(operands); + results[0] = vecBic_3ops(operands); break; } case Opcode::AArch64_BIFv16i8: { // bif vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecBitwiseInsert<16>(operands, true); + results[0] = vecBitwiseInsert<16>(operands, true); break; } case Opcode::AArch64_BITv16i8: { // bit vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecBitwiseInsert<16>(operands, false); + results[0] = vecBitwiseInsert<16>(operands, false); break; } case Opcode::AArch64_BITv8i8: { // bit vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecBitwiseInsert<8>(operands, false); + results[0] = vecBitwiseInsert<8>(operands, false); break; } case Opcode::AArch64_BL: { // bl #imm @@ -560,11 +544,11 @@ void Instruction::execute() { break; } case Opcode::AArch64_BSLv16i8: { // bsl vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecBsl<16>(operands); + results[0] = vecBsl<16>(operands); break; } case Opcode::AArch64_Bcc: { // b.cond label - if (AuxFunc::conditionHolds(metadata.cc, operands[0].get())) { + if (conditionHolds(metadata.cc, operands[0].get())) { branchTaken_ = true; branchAddress_ = instructionAddress_ + metadata.operands[0].imm; } else { @@ -590,7 +574,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CBNZW: { // cbnz wn, #imm - auto [taken, addr] = conditionalHelp::condBranch_cmpToZero( + auto [taken, addr] = condBranch_cmpToZero( operands, metadata, instructionAddress_, [](uint32_t x) -> bool { return x != 0; }); branchTaken_ = taken; @@ -598,7 +582,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CBNZX: { // cbnz xn, #imm - auto [taken, addr] = conditionalHelp::condBranch_cmpToZero( + auto [taken, addr] = condBranch_cmpToZero( operands, metadata, instructionAddress_, [](uint64_t x) -> bool { return x != 0; }); branchTaken_ = taken; @@ -606,7 +590,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CBZW: { // cbz wn, #imm - auto [taken, addr] = conditionalHelp::condBranch_cmpToZero( + auto [taken, addr] = condBranch_cmpToZero( operands, metadata, instructionAddress_, [](uint32_t x) -> bool { return x == 0; }); branchTaken_ = taken; @@ -614,7 +598,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CBZX: { // cbz xn, #imm - auto [taken, addr] = conditionalHelp::condBranch_cmpToZero( + auto [taken, addr] = condBranch_cmpToZero( operands, metadata, instructionAddress_, [](uint64_t x) -> bool { return x == 0; }); branchTaken_ = taken; @@ -622,77 +606,77 @@ void Instruction::execute() { break; } case Opcode::AArch64_CCMNWi: { // ccmn wn, #imm, #nzcv, cc - results[0] = conditionalHelp::ccmn_imm(operands, metadata); + results[0] = ccmn_imm(operands, metadata); break; } case Opcode::AArch64_CCMNXi: { // ccmn xn, #imm, #nzcv, cc - results[0] = conditionalHelp::ccmn_imm(operands, metadata); + results[0] = ccmn_imm(operands, metadata); break; } case Opcode::AArch64_CCMPWi: { // ccmp wn, #imm, #nzcv, cc - results[0] = conditionalHelp::ccmp_imm(operands, metadata); + results[0] = ccmp_imm(operands, metadata); break; } case Opcode::AArch64_CCMPWr: { // ccmp wn, wm, #nzcv, cc - results[0] = conditionalHelp::ccmp_reg(operands, metadata); + results[0] = ccmp_reg(operands, metadata); break; } case Opcode::AArch64_CCMPXi: { // ccmp xn, #imm, #nzcv, cc - results[0] = conditionalHelp::ccmp_imm(operands, metadata); + results[0] = ccmp_imm(operands, metadata); break; } case Opcode::AArch64_CCMPXr: { // ccmp xn, xm, #nzcv, cc - results[0] = conditionalHelp::ccmp_reg(operands, metadata); + results[0] = ccmp_reg(operands, metadata); break; } case Opcode::AArch64_CLZXr: { // clz xd, xn - results[0] = arithmeticHelp::clz_reg(operands); + results[0] = clz_reg(operands); break; } case Opcode::AArch64_CMEQv16i8: { // cmeq vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecCompare( + results[0] = vecCompare( operands, false, [](uint8_t x, uint8_t y) -> bool { return (x == y); }); break; } case Opcode::AArch64_CMEQv16i8rz: { // cmeq vd.16b, vn.16b, #0 - results[0] = neonHelp::vecCompare( + results[0] = vecCompare( operands, true, [](uint8_t x, uint8_t y) -> bool { return (x == y); }); break; } case Opcode::AArch64_CMEQv4i32: { // cmeq vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecCompare( + results[0] = vecCompare( operands, false, [](uint32_t x, uint32_t y) -> bool { return (x == y); }); break; } case Opcode::AArch64_CMEQv8i8: { // cmeq vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecCompare( + results[0] = vecCompare( operands, false, [](int8_t x, int8_t y) -> bool { return (x == y); }); break; } case Opcode::AArch64_CMEQv8i8rz: { // cmeq vd.8b, vn.8b, #0 - results[0] = neonHelp::vecCompare( + results[0] = vecCompare( operands, true, [](int8_t x, int8_t y) -> bool { return (x == y); }); break; } case Opcode::AArch64_CMHIv4i32: { // cmhi vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecCompare( + results[0] = vecCompare( operands, false, [](uint32_t x, uint32_t y) -> bool { return (x > y); }); break; } case Opcode::AArch64_CMHSv16i8: { // cmhs vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecCompare( + results[0] = vecCompare( operands, false, [](int8_t x, int8_t y) -> bool { return (x >= y); }); break; } case Opcode::AArch64_CMPEQ_PPzZI_B: { // cmpeq pd.b, pg/z, zn.b, #imm - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, true, [](uint8_t x, uint8_t y) -> bool { return x == y; }); results[0] = nzcv; @@ -700,7 +684,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPEQ_PPzZI_D: { // cmpeq pd.d, pg/z, zn.d, #imm - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, true, [](uint64_t x, uint64_t y) -> bool { return x == y; }); results[0] = nzcv; @@ -708,7 +692,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPEQ_PPzZI_H: { // cmpeq pd.h, pg/z, zn.h, #imm - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, true, [](uint16_t x, uint16_t y) -> bool { return x == y; }); results[0] = nzcv; @@ -716,7 +700,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPEQ_PPzZI_S: { // cmpeq pd.s, pg/z, zn.s, #imm - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, true, [](uint32_t x, uint32_t y) -> bool { return x == y; }); results[0] = nzcv; @@ -724,7 +708,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPEQ_PPzZZ_B: { // cmpeq pd.b, pg/z, zn.b, zm.b - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](uint8_t x, uint8_t y) -> bool { return x == y; }); results[0] = nzcv; @@ -732,7 +716,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPEQ_PPzZZ_D: { // cmpeq pd.d, pg/z, zn.d, zm.d - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](uint64_t x, uint64_t y) -> bool { return x == y; }); results[0] = nzcv; @@ -740,7 +724,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPEQ_PPzZZ_H: { // cmpeq pd.h, pg/z, zn.h, zm.h - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](uint16_t x, uint16_t y) -> bool { return x == y; }); results[0] = nzcv; @@ -748,7 +732,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPEQ_PPzZZ_S: { // cmpeq pd.s, pg/z, zn.s, zm.s - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](uint32_t x, uint32_t y) -> bool { return x == y; }); results[0] = nzcv; @@ -756,7 +740,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPGT_PPzZZ_B: { // cmpgt pd.b, pg/z, zn.b, zm.b - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](int8_t x, int8_t y) -> bool { return x > y; }); results[0] = nzcv; @@ -764,7 +748,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPGT_PPzZZ_D: { // cmpgt pd.d, pg/z, zn.d, zm.d - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](int64_t x, int64_t y) -> bool { return x > y; }); results[0] = nzcv; @@ -772,7 +756,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPGT_PPzZZ_H: { // cmpgt pd.h, pg/z, zn.h, zm.h - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](int16_t x, int16_t y) -> bool { return x > y; }); results[0] = nzcv; @@ -780,7 +764,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPGT_PPzZZ_S: { // cmpgt pd.s, pg/z, zn.s, zm.s - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](int32_t x, int32_t y) -> bool { return x > y; }); results[0] = nzcv; @@ -788,7 +772,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPHI_PPzZZ_B: { // cmphi pd.b, pg/z, zn.b, zm.b - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](uint8_t x, uint8_t y) -> bool { return x > y; }); results[0] = nzcv; @@ -796,7 +780,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPHI_PPzZZ_D: { // cmphi pd.d, pg/z, zn.d, zm.d - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](uint64_t x, uint64_t y) -> bool { return x > y; }); results[0] = nzcv; @@ -804,7 +788,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPHI_PPzZZ_H: { // cmphi pd.h, pg/z, zn.h, zm.h - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](uint16_t x, uint16_t y) -> bool { return x > y; }); results[0] = nzcv; @@ -812,7 +796,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPHI_PPzZZ_S: { // cmphi pd.s, pg/z, zn.s, zm.s - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](uint32_t x, uint32_t y) -> bool { return x > y; }); results[0] = nzcv; @@ -820,7 +804,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPNE_PPzZI_B: { // cmpne pd.b, pg/z. zn.b, #imm - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, true, [](int8_t x, int8_t y) -> bool { return x != y; }); results[0] = nzcv; @@ -828,7 +812,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPNE_PPzZI_D: { // cmpne pd.d, pg/z. zn.d, #imm - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, true, [](int64_t x, int64_t y) -> bool { return x != y; }); results[0] = nzcv; @@ -836,7 +820,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPNE_PPzZI_H: { // cmpne pd.h, pg/z. zn.h, #imm - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, true, [](int16_t x, int16_t y) -> bool { return x != y; }); results[0] = nzcv; @@ -844,7 +828,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPNE_PPzZI_S: { // cmpne pd.s, pg/z. zn.s, #imm - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, true, [](int32_t x, int32_t y) -> bool { return x != y; }); results[0] = nzcv; @@ -852,7 +836,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPNE_PPzZZ_B: { // cmpne pd.b, pg/z, zn.b, zm.b - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](int8_t x, int8_t y) -> bool { return x != y; }); results[0] = nzcv; @@ -860,7 +844,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPNE_PPzZZ_D: { // cmpne pd.d, pg/z, zn.d, zm.d - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](int64_t x, int64_t y) -> bool { return x != y; }); results[0] = nzcv; @@ -868,7 +852,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPNE_PPzZZ_H: { // cmpne pd.h, pg/z, zn.h, zm.h - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](int16_t x, int16_t y) -> bool { return x != y; }); results[0] = nzcv; @@ -876,7 +860,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMPNE_PPzZZ_S: { // cmpne pd.s, pg/z, zn.s, zm.s - auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred( + auto [output, nzcv] = sveCmpPredicated_toPred( operands, metadata, VL_bits, false, [](int32_t x, int32_t y) -> bool { return x != y; }); results[0] = nzcv; @@ -884,123 +868,118 @@ void Instruction::execute() { break; } case Opcode::AArch64_CNTB_XPiI: { // cntb xd{, pattern{, #imm}} - results[0] = sveHelp::sveCnt_gpr(metadata, VL_bits); + results[0] = sveCnt_gpr(metadata, VL_bits); break; } case Opcode::AArch64_CNTD_XPiI: { // cntd xd{, pattern{, #imm}} - results[0] = sveHelp::sveCnt_gpr(metadata, VL_bits); + results[0] = sveCnt_gpr(metadata, VL_bits); break; } case Opcode::AArch64_CNTH_XPiI: { // cnth xd{, pattern{, #imm}} - results[0] = sveHelp::sveCnt_gpr(metadata, VL_bits); + results[0] = sveCnt_gpr(metadata, VL_bits); break; } case Opcode::AArch64_CNTP_XPP_B: { // cntp xd, pg, pn.b - results[0] = sveHelp::sveCntp(operands, VL_bits); + results[0] = sveCntp(operands, VL_bits); break; } case Opcode::AArch64_CNTP_XPP_D: { // cntp xd, pg, pn.d - results[0] = sveHelp::sveCntp(operands, VL_bits); + results[0] = sveCntp(operands, VL_bits); break; } case Opcode::AArch64_CNTP_XPP_H: { // cntp xd, pg, pn.h - results[0] = sveHelp::sveCntp(operands, VL_bits); + results[0] = sveCntp(operands, VL_bits); break; } case Opcode::AArch64_CNTP_XPP_S: { // cntp xd, pg, pn.s - results[0] = sveHelp::sveCntp(operands, VL_bits); + results[0] = sveCntp(operands, VL_bits); break; } case Opcode::AArch64_CNTW_XPiI: { // cntw xd{, pattern{, #imm}} - results[0] = sveHelp::sveCnt_gpr(metadata, VL_bits); + results[0] = sveCnt_gpr(metadata, VL_bits); break; } case Opcode::AArch64_CNTv8i8: { // cnt vd.8b, vn.8b - results[0] = neonHelp::vecCountPerByte(operands); + results[0] = vecCountPerByte(operands); break; } case Opcode::AArch64_CPY_ZPzI_B: { // cpy zd.b, pg/z, #imm{, shift} - results[0] = sveHelp::sveCpy_imm(operands, metadata, VL_bits); + results[0] = sveCpy_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_CPY_ZPzI_D: { // cpy zd.d, pg/z, #imm{, shift} - results[0] = sveHelp::sveCpy_imm(operands, metadata, VL_bits); + results[0] = sveCpy_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_CPY_ZPzI_H: { // cpy zd.h, pg/z, #imm{, shift} - results[0] = sveHelp::sveCpy_imm(operands, metadata, VL_bits); + results[0] = sveCpy_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_CPY_ZPzI_S: { // cpy zd.s, pg/z, #imm{, shift} - results[0] = sveHelp::sveCpy_imm(operands, metadata, VL_bits); + results[0] = sveCpy_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_DUPi32: { // dup vd, vn.s[index] - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, false); + results[0] = vecDup_gprOrIndex(operands, metadata, false); break; } case Opcode::AArch64_DUPi64: { // dup vd, vn.d[index] - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, false); + results[0] = vecDup_gprOrIndex(operands, metadata, false); break; } case Opcode::AArch64_CSELWr: { // csel wd, wn, wm, cc results[0] = { - conditionalHelp::cs_4ops( - operands, metadata, [](uint32_t x) -> uint32_t { return x; }), + cs_4ops(operands, metadata, + [](uint32_t x) -> uint32_t { return x; }), 8}; break; } case Opcode::AArch64_CSELXr: { // csel xd, xn, xm, cc - results[0] = conditionalHelp::cs_4ops( + results[0] = cs_4ops( operands, metadata, [](uint64_t x) -> uint64_t { return x; }); break; } case Opcode::AArch64_CSINCWr: { // csinc wd, wn, wm, cc - results[0] = {conditionalHelp::cs_4ops( - operands, metadata, - [](uint32_t x) -> uint32_t { return x + 1; }), - 8}; + results[0] = { + cs_4ops(operands, metadata, + [](uint32_t x) -> uint32_t { return x + 1; }), + 8}; break; } case Opcode::AArch64_CSINCXr: { // csinc xd, xn, xm, cc - results[0] = conditionalHelp::cs_4ops( + results[0] = cs_4ops( operands, metadata, [](uint64_t x) -> uint64_t { return x + 1; }); break; } case Opcode::AArch64_CSINVWr: { // csinv wd, wn, wm, cc results[0] = { - conditionalHelp::cs_4ops( - operands, metadata, [](uint32_t x) -> uint32_t { return ~x; }), + cs_4ops(operands, metadata, + [](uint32_t x) -> uint32_t { return ~x; }), 8}; break; } case Opcode::AArch64_CSINVXr: { // csinv xd, xn, xm, cc - results[0] = conditionalHelp::cs_4ops( + results[0] = cs_4ops( operands, metadata, [](uint64_t x) -> uint64_t { return ~x; }); break; } case Opcode::AArch64_CSNEGWr: { // csneg wd, wn, wm, cc - results[0] = { - conditionalHelp::cs_4ops( - operands, metadata, [](int32_t x) -> int32_t { return -x; }), - 8}; + results[0] = {cs_4ops(operands, metadata, + [](int32_t x) -> int32_t { return -x; }), + 8}; break; } case Opcode::AArch64_CSNEGXr: { // csneg xd, xn, xm, cc - results[0] = conditionalHelp::cs_4ops( + results[0] = cs_4ops( operands, metadata, [](uint64_t x) -> uint64_t { return -x; }); break; } case Opcode::AArch64_DECB_XPiI: { // decb xdn{, pattern{, MUL #imm}} - results[0] = - sveHelp::sveDec_scalar(operands, metadata, VL_bits); + results[0] = sveDec_scalar(operands, metadata, VL_bits); break; } case Opcode::AArch64_DECD_XPiI: { // decd xdn{, pattern{, MUL #imm}} - results[0] = - sveHelp::sveDec_scalar(operands, metadata, VL_bits); + results[0] = sveDec_scalar(operands, metadata, VL_bits); break; } case Opcode::AArch64_DMB: { // dmb option|#imm @@ -1017,48 +996,47 @@ void Instruction::execute() { break; } case Opcode::AArch64_DUP_ZI_B: { // dup zd.b, #imm{, shift} - results[0] = sveHelp::sveDup_immOrScalar(operands, metadata, - VL_bits, true); + results[0] = + sveDup_immOrScalar(operands, metadata, VL_bits, true); break; } case Opcode::AArch64_DUP_ZI_D: { // dup zd.d, #imm{, shift} - results[0] = sveHelp::sveDup_immOrScalar(operands, metadata, - VL_bits, true); + results[0] = + sveDup_immOrScalar(operands, metadata, VL_bits, true); break; } case Opcode::AArch64_DUP_ZI_H: { // dup zd.h, #imm{, shift} - results[0] = sveHelp::sveDup_immOrScalar(operands, metadata, - VL_bits, true); + results[0] = + sveDup_immOrScalar(operands, metadata, VL_bits, true); break; } case Opcode::AArch64_DUP_ZI_S: { // dup zd.s, #imm{, shift} - results[0] = sveHelp::sveDup_immOrScalar(operands, metadata, - VL_bits, true); + results[0] = + sveDup_immOrScalar(operands, metadata, VL_bits, true); break; } case Opcode::AArch64_DUP_ZR_B: { // dup zd.b, wn - results[0] = sveHelp::sveDup_immOrScalar(operands, metadata, - VL_bits, false); + results[0] = + sveDup_immOrScalar(operands, metadata, VL_bits, false); break; } case Opcode::AArch64_DUP_ZR_D: { // dup zd.d, xn - results[0] = sveHelp::sveDup_immOrScalar(operands, metadata, - VL_bits, false); + results[0] = + sveDup_immOrScalar(operands, metadata, VL_bits, false); break; } case Opcode::AArch64_DUP_ZR_H: { // dup zd.h, wn - results[0] = sveHelp::sveDup_immOrScalar(operands, metadata, - VL_bits, false); + results[0] = + sveDup_immOrScalar(operands, metadata, VL_bits, false); break; } case Opcode::AArch64_DUP_ZR_S: { // dup zd.s, wn - results[0] = sveHelp::sveDup_immOrScalar(operands, metadata, - VL_bits, false); + results[0] = + sveDup_immOrScalar(operands, metadata, VL_bits, false); break; } case Opcode::AArch64_DUP_ZZI_D: { // dup zd.d, zn.d[#imm] - results[0] = - sveHelp::sveDup_vecIndexed(operands, metadata, VL_bits); + results[0] = sveDup_vecIndexed(operands, metadata, VL_bits); break; } case Opcode::AArch64_DUP_ZZI_Q: { // dup zd.q, zn.q[#imm] @@ -1083,126 +1061,116 @@ void Instruction::execute() { break; } case Opcode::AArch64_DUP_ZZI_S: { // dup zd.s, zn.s[#imm] - results[0] = - sveHelp::sveDup_vecIndexed(operands, metadata, VL_bits); + results[0] = sveDup_vecIndexed(operands, metadata, VL_bits); break; } case Opcode::AArch64_DUPv16i8gpr: { // dup vd.16b, wn - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, true); + results[0] = vecDup_gprOrIndex(operands, metadata, true); break; } case Opcode::AArch64_DUPv2i32gpr: { // dup vd.2s, wn - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, true); + results[0] = vecDup_gprOrIndex(operands, metadata, true); break; } case Opcode::AArch64_DUPv2i32lane: { // dup vd.2s, vn.s[index] - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, false); + results[0] = vecDup_gprOrIndex(operands, metadata, false); break; } case Opcode::AArch64_DUPv2i64gpr: { // dup vd.2d, xn - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, true); + results[0] = vecDup_gprOrIndex(operands, metadata, true); break; } case Opcode::AArch64_DUPv2i64lane: { // dup vd.2d, vn.d[index] - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, false); + results[0] = vecDup_gprOrIndex(operands, metadata, false); break; } case Opcode::AArch64_DUPv4i16gpr: { // dup vd.4h, wn - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, true); + results[0] = vecDup_gprOrIndex(operands, metadata, true); break; } case Opcode::AArch64_DUPv4i32gpr: { // dup vd.4s, wn - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, true); + results[0] = vecDup_gprOrIndex(operands, metadata, true); break; } case Opcode::AArch64_DUPv4i32lane: { // dup vd.4s, vn.s[index] - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, false); + results[0] = vecDup_gprOrIndex(operands, metadata, false); break; } case Opcode::AArch64_DUPv8i16gpr: { // dup vd.8h, wn - results[0] = - neonHelp::vecDup_gprOrIndex(operands, metadata, true); + results[0] = vecDup_gprOrIndex(operands, metadata, true); break; } case Opcode::AArch64_EORWri: { // eor wd, wn, #imm - auto [result, nzcv] = logicalHelp::logicOp_imm( + auto [result, nzcv] = logicOp_imm( operands, metadata, false, [](uint32_t x, uint32_t y) -> uint32_t { return x ^ y; }); results[0] = {result, 8}; break; } case Opcode::AArch64_EORWrs: { // eor wd, wn, wm{, shift #imm} - auto [result, nzcv] = logicalHelp::logicOpShift_3ops( + auto [result, nzcv] = logicOpShift_3ops( operands, metadata, false, [](uint32_t x, uint32_t y) -> uint32_t { return x ^ y; }); results[0] = {result, 8}; break; } case Opcode::AArch64_EORXri: { // eor xd, xn, #imm - auto [result, nzcv] = logicalHelp::logicOp_imm( + auto [result, nzcv] = logicOp_imm( operands, metadata, false, [](uint64_t x, uint64_t y) -> uint64_t { return x ^ y; }); results[0] = result; break; } case Opcode::AArch64_EORXrs: { // eor xd, xn, xm{, shift #amount} - auto [result, nzcv] = logicalHelp::logicOpShift_3ops( + auto [result, nzcv] = logicOpShift_3ops( operands, metadata, false, [](uint64_t x, uint64_t y) -> uint64_t { return x ^ y; }); results[0] = result; break; } case Opcode::AArch64_EOR_PPzPP: { - results[0] = sveHelp::sveLogicOp_preds( + results[0] = sveLogicOp_preds( operands, VL_bits, [](uint64_t x, uint64_t y) -> uint64_t { return x ^ y; }); break; } case Opcode::AArch64_EOR_ZPmZ_B: { // eor zdn.b, pg/m, zdn.b, zm.b - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](uint8_t x, uint8_t y) -> uint8_t { return x ^ y; }); break; } case Opcode::AArch64_EOR_ZPmZ_D: { // eor zdn.d, pg/m, zdn.d, zm.d - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](uint64_t x, uint64_t y) -> uint64_t { return x ^ y; }); break; } case Opcode::AArch64_EOR_ZPmZ_H: { // eor zdn.h, pg/m, zdn.h, zm.h - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](uint16_t x, uint16_t y) -> uint16_t { return x ^ y; }); break; } case Opcode::AArch64_EOR_ZPmZ_S: { // eor zdn.s, pg/m, zdn.s, zm.s - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](uint32_t x, uint32_t y) -> uint32_t { return x ^ y; }); break; } case Opcode::AArch64_EOR_ZZZ: { // eor zd.d, zn.d, zm.d - results[0] = sveHelp::sveLogicOpUnPredicated_3vecs( + results[0] = sveLogicOpUnPredicated_3vecs( operands, VL_bits, [](uint64_t x, uint64_t y) -> uint64_t { return x ^ y; }); break; } case Opcode::AArch64_EORv16i8: { // eor vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint8_t x, uint8_t y) -> uint8_t { return x ^ y; }); break; } case Opcode::AArch64_EORv8i8: { // eor vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint8_t x, uint8_t y) -> uint8_t { return x ^ y; }); break; } @@ -1235,355 +1203,347 @@ void Instruction::execute() { break; } case Opcode::AArch64_EXTRWrri: { // extr wd, wn, wm, #lsb - results[0] = { - bitmanipHelp::extrLSB_registers(operands, metadata), 8}; + results[0] = {extrLSB_registers(operands, metadata), 8}; break; } case Opcode::AArch64_EXTRXrri: { // extr xd, xn, xm, #lsb - results[0] = - bitmanipHelp::extrLSB_registers(operands, metadata); + results[0] = extrLSB_registers(operands, metadata); break; } case Opcode::AArch64_EXTv16i8: { // ext vd.16b, vn.16b, vm.16b, #index - results[0] = - neonHelp::vecExtVecs_index(operands, metadata); + results[0] = vecExtVecs_index(operands, metadata); break; } case Opcode::AArch64_EXTv8i8: { // ext vd.8b, vn.8b, vm.8b, #index - results[0] = neonHelp::vecExtVecs_index(operands, metadata); + results[0] = vecExtVecs_index(operands, metadata); break; } case Opcode::AArch64_FABDv2f64: { // fabd vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecFabd(operands); + results[0] = vecFabd(operands); break; } case Opcode::AArch64_FABDv4f32: { // fabd vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecFabd(operands); + results[0] = vecFabd(operands); break; } case Opcode::AArch64_FABD32: { // fabd sd, sn, sm - results[0] = floatHelp::fabd_3ops(operands); + results[0] = fabd_3ops(operands); break; } case Opcode::AArch64_FABD64: { // fabd dd, dn, dm - results[0] = floatHelp::fabd_3ops(operands); + results[0] = fabd_3ops(operands); break; } case Opcode::AArch64_FABSDr: { // fabs dd, dn - results[0] = floatHelp::fabs_2ops(operands); + results[0] = fabs_2ops(operands); break; } case Opcode::AArch64_FABSSr: { // fabs sd, sn - results[0] = floatHelp::fabs_2ops(operands); + results[0] = fabs_2ops(operands); break; } case Opcode::AArch64_FABS_ZPmZ_D: { // fabs zd.d, pg/m, zn.d - results[0] = sveHelp::sveFabsPredicated(operands, VL_bits); + results[0] = sveFabsPredicated(operands, VL_bits); break; } case Opcode::AArch64_FABS_ZPmZ_S: { // fabs zd.s, pg/m, zn.s - results[0] = sveHelp::sveFabsPredicated(operands, VL_bits); + results[0] = sveFabsPredicated(operands, VL_bits); break; } case Opcode::AArch64_FABSv2f64: { // fabs vd.2d, vn.2d - results[0] = neonHelp::vecFabs_2ops(operands); + results[0] = vecFabs_2ops(operands); break; } case Opcode::AArch64_FABSv4f32: { // fabs vd.4s, vn.4s - results[0] = neonHelp::vecFabs_2ops(operands); + results[0] = vecFabs_2ops(operands); break; } case Opcode::AArch64_FADDA_VPZ_D: { // fadda dd, pg/m, dn, zm.d - results[0] = sveHelp::sveFaddaPredicated(operands, VL_bits); + results[0] = sveFaddaPredicated(operands, VL_bits); break; } case Opcode::AArch64_FADDA_VPZ_S: { // fadda sd, pg/m, sn, zm.s - results[0] = sveHelp::sveFaddaPredicated(operands, VL_bits); + results[0] = sveFaddaPredicated(operands, VL_bits); break; } case Opcode::AArch64_FADDDrr: { // fadd dd, dn, dm - results[0] = {arithmeticHelp::add_3ops(operands), 256}; + results[0] = {add_3ops(operands), 256}; break; } case Opcode::AArch64_FADDPv2f32: { // faddp vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecAddp_3ops(operands); + results[0] = vecAddp_3ops(operands); break; } case Opcode::AArch64_FADDPv2f64: { // faddp vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecAddp_3ops(operands); + results[0] = vecAddp_3ops(operands); break; } case Opcode::AArch64_FADDPv2i32p: { // faddp dd, vn.2s - results[0] = neonHelp::vecSumElems_2ops(operands); + results[0] = vecSumElems_2ops(operands); break; } case Opcode::AArch64_FADDPv2i64p: { // faddp dd, vn.2d - results[0] = neonHelp::vecSumElems_2ops(operands); + results[0] = vecSumElems_2ops(operands); break; } case Opcode::AArch64_FADDPv4f32: { // faddp vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecAddp_3ops(operands); + results[0] = vecAddp_3ops(operands); break; } case Opcode::AArch64_FADDSrr: { // fadd sd, sn, sm - results[0] = {arithmeticHelp::add_3ops(operands), 256}; + results[0] = {add_3ops(operands), 256}; break; } case Opcode::AArch64_FADD_ZPmI_D: { // fadd zdn.d, pg/m, zdn.d, const - results[0] = sveHelp::sveAddPredicated_const(operands, metadata, - VL_bits); + results[0] = + sveAddPredicated_const(operands, metadata, VL_bits); break; } case Opcode::AArch64_FADD_ZPmI_S: { // fadd zdn.s, pg/m, zdn.s, const - results[0] = - sveHelp::sveAddPredicated_const(operands, metadata, VL_bits); + results[0] = sveAddPredicated_const(operands, metadata, VL_bits); break; } case Opcode::AArch64_FADD_ZPmZ_D: { // fadd zdn.d, pg/m, zdn.d, zm.d - results[0] = sveHelp::sveAddPredicated_vecs(operands, VL_bits); + results[0] = sveAddPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_FADD_ZPmZ_S: { // fadd zdn.s, pg/m, zdn.s, zm.s - results[0] = sveHelp::sveAddPredicated_vecs(operands, VL_bits); + results[0] = sveAddPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_FADD_ZZZ_D: { // fadd zd.d, zn.d, zm.d - results[0] = sveHelp::sveAdd_3ops(operands, VL_bits); + results[0] = sveAdd_3ops(operands, VL_bits); break; } case Opcode::AArch64_FADD_ZZZ_S: { // fadd zd.s, zn.s, zm.s - results[0] = sveHelp::sveAdd_3ops(operands, VL_bits); + results[0] = sveAdd_3ops(operands, VL_bits); break; } case Opcode::AArch64_FADDv2f32: { // fadd vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_FADDv2f64: { // fadd vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_FADDv4f32: { // fadd vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecAdd_3ops(operands); + results[0] = vecAdd_3ops(operands); break; } case Opcode::AArch64_FCADD_ZPmZ_D: { // fcadd zdn.d, pg/m, zdn.d, zm.d, // #imm - results[0] = - sveHelp::sveFcaddPredicated(operands, metadata, VL_bits); + results[0] = sveFcaddPredicated(operands, metadata, VL_bits); break; } case Opcode::AArch64_FCCMPDrr: // fccmp sn, sm, #nzcv, cc case Opcode::AArch64_FCCMPEDrr: { // fccmpe sn, sm, #nzcv, cc - results[0] = floatHelp::fccmp(operands, metadata); + results[0] = fccmp(operands, metadata); break; } case Opcode::AArch64_FCCMPESrr: { // fccmpe sn, sm, #nzcv, cc - results[0] = floatHelp::fccmp(operands, metadata); + results[0] = fccmp(operands, metadata); break; } case Opcode::AArch64_FCCMPSrr: { // fccmp sn, sm, #nzcv, cc - results[0] = floatHelp::fccmp(operands, metadata); + results[0] = fccmp(operands, metadata); break; } case Opcode::AArch64_FCMEQv2i32rz: { // fcmeq vd.2s, vd.2s, #0.0 - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, true, [](float x, float y) -> bool { return x == y; }); break; } case Opcode::AArch64_FCMEQv4i32rz: { // fcmeq vd.4s vn.4s, #0.0 - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, true, [](float x, float y) -> bool { return x == y; }); break; } case Opcode::AArch64_FCMGE_PPzZ0_D: { // fcmge pd.d, pg/z, zn.d, #0.0 - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, true, [](double x, double y) -> bool { return x >= y; }); break; } case Opcode::AArch64_FCMGE_PPzZ0_S: { // fcmge pd.s, pg/z, zn.s, #0.0 - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, true, [](float x, float y) -> bool { return x >= y; }); break; } case Opcode::AArch64_FCMGE_PPzZZ_D: { // fcmge pd.d, pg/z, zn.d, zm.d - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, false, [](double x, double y) -> bool { return x >= y; }); break; } case Opcode::AArch64_FCMGE_PPzZZ_S: { // fcmge pd.s, pg/z, zn.s, zm.s - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, false, [](float x, float y) -> bool { return x >= y; }); break; } case Opcode::AArch64_FCMGEv2f32: { // fcmge vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, false, [](float x, float y) -> bool { return x >= y; }); break; } case Opcode::AArch64_FCMGEv2f64: { // fcmge vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, false, [](float x, double y) -> bool { return x >= y; }); break; } case Opcode::AArch64_FCMGEv2i64rz: { // fcmge vd.2d, vn.2d, 0.0 - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, true, [](double x, double y) -> bool { return x >= y; }); break; } case Opcode::AArch64_FCMGEv4f32: { // fcmge vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, false, [](float x, float y) -> bool { return x >= y; }); break; } case Opcode::AArch64_FCMGEv4i32rz: { // fcmge vd.4s, vn.4s, 0.0 - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, true, [](float x, float y) -> bool { return x >= y; }); break; } case Opcode::AArch64_FCMGT_PPzZ0_D: { // fcmgt pd.d, pg/z, zn.d, #0.0 - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, true, [](double x, double y) -> bool { return x > y; }); break; } case Opcode::AArch64_FCMGT_PPzZ0_S: { // fcmgt pd.s, pg/z, zn.s, #0.0 - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, true, [](float x, float y) -> bool { return x > y; }); break; } case Opcode::AArch64_FCMGT_PPzZZ_D: { // fcmgt pd.d, pg/z, zn.d, zm.d - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, false, [](double x, double y) -> bool { return x > y; }); break; } case Opcode::AArch64_FCMGT_PPzZZ_S: { // fcmgt pd.s, pg/z, zn.s, zm. - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, false, [](float x, float y) -> bool { return x > y; }); break; } case Opcode::AArch64_FCMGTv2i32rz: { // fcmgt vd.2s, vn.2s, #0.0 - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, true, [](float x, float y) -> bool { return x > y; }); break; } case Opcode::AArch64_FCMGTv2i64rz: { // fcmgt vd.2d, vn.2d, #0.0 - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, true, [](double x, double y) -> bool { return x > y; }); break; } case Opcode::AArch64_FCMGTv2f64: { // fcmgt vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, false, [](double x, double y) -> bool { return x > y; }); break; } case Opcode::AArch64_FCMGTv4f32: { // fcmgt vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, false, [](float x, float y) -> bool { return x > y; }); break; } case Opcode::AArch64_FCMGTv4i32rz: { // fcmgt vd.4s, vn.4s, #0.0 - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, true, [](float x, float y) -> bool { return x > y; }); break; } case Opcode::AArch64_FCMLA_ZPmZZ_D: { // fcmla zda, pg/m, zn, zm, #imm - results[0] = - sveHelp::sveFcmlaPredicated(operands, metadata, VL_bits); + results[0] = sveFcmlaPredicated(operands, metadata, VL_bits); break; } case Opcode::AArch64_FCMLE_PPzZ0_D: { // fcmle pd.d, pg/z, zn.d, #0.0 - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, true, [](double x, double y) -> bool { return x <= y; }); break; } case Opcode::AArch64_FCMLE_PPzZ0_S: { // fcmle pd.s, pg/z, zn.s, #0.0 - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, true, [](float x, float y) -> bool { return x <= y; }); break; } case Opcode::AArch64_FCMLT_PPzZ0_S: { // fcmlt pd.s, pg/z, zn.s, #0.0 - results[0] = sveHelp::sveComparePredicated_vecsToPred( + results[0] = sveComparePredicated_vecsToPred( operands, metadata, VL_bits, true, [](float x, float y) -> bool { return x < y; }); break; } case Opcode::AArch64_FCMLTv2i32rz: { // fcmlt vd.2s, vn.2s, #0.0 - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, true, [](float x, float y) -> bool { return x < y; }); break; } case Opcode::AArch64_FCMLTv2i64rz: { // fcmlt vd.2d, vn.2d, #0.0 - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, true, [](double x, double y) -> bool { return x < y; }); break; } case Opcode::AArch64_FCMLTv4i32rz: { // fcmlt vd.4s, vn.4s, #0.0 - results[0] = neonHelp::vecFCompare( + results[0] = vecFCompare( operands, true, [](float x, float y) -> bool { return x < y; }); break; } case Opcode::AArch64_FCMPDri: { // fcmp dn, #imm - results[0] = floatHelp::fcmp(operands, true); + results[0] = fcmp(operands, true); break; } case Opcode::AArch64_FCMPDrr: { // fcmp dn, dm - results[0] = floatHelp::fcmp(operands, false); + results[0] = fcmp(operands, false); break; } case Opcode::AArch64_FCMPEDri: { // fcmpe dn, #imm - results[0] = floatHelp::fcmp(operands, true); + results[0] = fcmp(operands, true); break; } case Opcode::AArch64_FCMPEDrr: { // fcmpe dn, dm - results[0] = floatHelp::fcmp(operands, false); + results[0] = fcmp(operands, false); break; } case Opcode::AArch64_FCMPESri: { // fcmpe sn, #imm - results[0] = floatHelp::fcmp(operands, true); + results[0] = fcmp(operands, true); break; } case Opcode::AArch64_FCMPESrr: { // fcmpe sn, sm - results[0] = floatHelp::fcmp(operands, false); + results[0] = fcmp(operands, false); break; } case Opcode::AArch64_FCMPSri: { // fcmp sn, #imm - results[0] = floatHelp::fcmp(operands, true); + results[0] = fcmp(operands, true); break; } case Opcode::AArch64_FCMPSrr: { // fcmp sn, sm - results[0] = floatHelp::fcmp(operands, false); + results[0] = fcmp(operands, false); break; } case Opcode::AArch64_FCPY_ZPmI_D: { // fcpy zd.d, pg/m, #const - results[0] = sveHelp::sveFcpy_imm(operands, metadata, VL_bits); + results[0] = sveFcpy_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_FCPY_ZPmI_S: { // fcpy zd.s, pg/m, #const - results[0] = sveHelp::sveFcpy_imm(operands, metadata, VL_bits); + results[0] = sveFcpy_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_FCSELDrrr: { // fcsel dd, dn, dm, cond - results[0] = { - conditionalHelp::cs_4ops( - operands, metadata, [](double x) -> double { return x; }), - 256}; + results[0] = {cs_4ops(operands, metadata, + [](double x) -> double { return x; }), + 256}; break; } case Opcode::AArch64_FCSELSrrr: { // fcsel sd, sn, sm, cond - results[0] = { - conditionalHelp::cs_4ops(operands, metadata, - [](float x) -> float { return x; }), - 256}; + results[0] = {cs_4ops(operands, metadata, + [](float x) -> float { return x; }), + 256}; break; } case Opcode::AArch64_FCVTASUWDr: { // fcvtas wd, dn @@ -1597,28 +1557,28 @@ void Instruction::execute() { } case Opcode::AArch64_FCVTDSr: { // fcvt dd, sn // TODO: Handle NaNs, denorms, and saturation? - results[0] = neonHelp::vecFcvtl(operands, false); + results[0] = vecFcvtl(operands, false); break; } case Opcode::AArch64_FCVTLv2i32: { // fcvtl vd.2d, vn.2s - results[0] = neonHelp::vecFcvtl(operands, false); + results[0] = vecFcvtl(operands, false); break; } case Opcode::AArch64_FCVTLv4i32: { // fcvtl2 vd.2d, vn.4s - results[0] = neonHelp::vecFcvtl(operands, true); + results[0] = vecFcvtl(operands, true); break; } case Opcode::AArch64_FCVTNv2i32: { // fcvtn vd.2s, vn.2d - results[0] = neonHelp::vecFcvtn(operands, false); + results[0] = vecFcvtn(operands, false); break; } case Opcode::AArch64_FCVTNv4i32: { // fcvtn2 vd.4s, vn.2d - results[0] = neonHelp::vecFcvtn(operands, true); + results[0] = vecFcvtn(operands, true); break; } case Opcode::AArch64_FCVTSDr: { // fcvt sd, dn // TODO: Handle NaNs, denorms, and saturation? - results[0] = neonHelp::vecFcvtl(operands, false); + results[0] = vecFcvtl(operands, false); break; } case Opcode::AArch64_FCVTZSUWDr: { // fcvtzs wd, dn @@ -1640,234 +1600,212 @@ void Instruction::execute() { break; } case Opcode::AArch64_FCVTZS_ZPmZ_DtoD: { // fcvtzs zd.d, pg/m, zn.d - results[0] = - sveHelp::sveFcvtzsPredicated(operands, VL_bits); + results[0] = sveFcvtzsPredicated(operands, VL_bits); break; } case Opcode::AArch64_FCVTZS_ZPmZ_DtoS: { // fcvtzs zd.s, pg/m, zn.d - results[0] = - sveHelp::sveFcvtzsPredicated(operands, VL_bits); + results[0] = sveFcvtzsPredicated(operands, VL_bits); break; } case Opcode::AArch64_FCVTZS_ZPmZ_StoD: { // fcvtzs zd.d, pg/m, zn.s - results[0] = - sveHelp::sveFcvtzsPredicated(operands, VL_bits); + results[0] = sveFcvtzsPredicated(operands, VL_bits); break; } case Opcode::AArch64_FCVTZS_ZPmZ_StoS: { // fcvtzs zd.s, pg/m, zn.s - results[0] = - sveHelp::sveFcvtzsPredicated(operands, VL_bits); + results[0] = sveFcvtzsPredicated(operands, VL_bits); break; } case Opcode::AArch64_FCVTZSv2f64: { // fcvtzs vd.2d, vn.2d - results[0] = neonHelp::vecFcvtzs(operands); + results[0] = vecFcvtzs(operands); break; } case Opcode::AArch64_FCVTZUUWDr: { // fcvtzu wd, dn - // TODO: Handle NaNs, denorms, and saturation - results[0] = { - static_cast(std::trunc(operands[0].get())), 8}; + results[0] = {fcvtzu_integer(operands), 8}; break; } case Opcode::AArch64_FCVTZUUWSr: { // fcvtzu wd, sn - // TODO: Handle NaNs, denorms, and saturation - results[0] = { - static_cast(std::trunc(operands[0].get())), 8}; + results[0] = {fcvtzu_integer(operands), 8}; break; } case Opcode::AArch64_FCVTZUUXDr: { // fcvtzu xd, dn - // TODO: Handle NaNs, denorms, and saturation - results[0] = - static_cast(std::trunc(operands[0].get())); + results[0] = {fcvtzu_integer(operands), 8}; break; } case Opcode::AArch64_FCVTZUUXSr: { // fcvtzu xd, sn - // TODO: Handle NaNs, denorms, and saturation - results[0] = static_cast(std::trunc(operands[0].get())); + results[0] = {fcvtzu_integer(operands), 8}; break; } case Opcode::AArch64_FCVTZUv1i64: { // fcvtzu dd, dn - // TODO: Handle NaNs, denorms, and saturation - results[0] = { - static_cast(std::trunc(operands[0].get())), 256}; + results[0] = {fcvtzu_integer(operands), 256}; break; } case Opcode::AArch64_FCVT_ZPmZ_DtoS: { // fcvt zd.s, pg/m, zn.d - results[0] = - sveHelp::sveFcvtPredicated(operands, VL_bits); + results[0] = sveFcvtPredicated(operands, VL_bits); break; } case Opcode::AArch64_FCVT_ZPmZ_StoD: { // fcvt zd.d, pg/m, zn.s - results[0] = - sveHelp::sveFcvtPredicated(operands, VL_bits); + results[0] = sveFcvtPredicated(operands, VL_bits); break; } case Opcode::AArch64_FDIVDrr: { // fdiv dd, dn, dm - results[0] = {divideHelp::div_3ops(operands), 256}; + results[0] = {div_3ops(operands), 256}; break; } case Opcode::AArch64_FDIVR_ZPmZ_D: { // fdivr zdn.d, pg/m, zdn.d, zm.d - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](double x, double y) -> double { return (y / x); }); break; } case Opcode::AArch64_FDIVR_ZPmZ_S: { // fdivr zdn.s, pg/m, zdn.s, zm.s - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](float x, float y) -> float { return (y / x); }); break; } case Opcode::AArch64_FDIVSrr: { // fdiv sd, sn, sm - results[0] = {divideHelp::div_3ops(operands), 256}; + results[0] = {div_3ops(operands), 256}; break; } case Opcode::AArch64_FDIV_ZPmZ_D: { // fdiv zdn.d, pg/m, zdn.d, zm.d - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](double x, double y) -> double { return (x / y); }); break; } case Opcode::AArch64_FDIVv2f64: { // fdiv vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](double x, double y) -> double { return x / y; }); break; } case Opcode::AArch64_FDUP_ZI_D: { // fdup zd.d, #imm - results[0] = sveHelp::sveDup_immOrScalar(operands, metadata, - VL_bits, true); + results[0] = + sveDup_immOrScalar(operands, metadata, VL_bits, true); break; } case Opcode::AArch64_FDUP_ZI_S: { // fdup zd.s, #imm - results[0] = sveHelp::sveDup_immOrScalar(operands, metadata, - VL_bits, true); + results[0] = + sveDup_immOrScalar(operands, metadata, VL_bits, true); break; } case Opcode::AArch64_FMADDDrrr: { // fmadd dn, dm, da - results[0] = {multiplyHelp::madd_4ops(operands), 256}; + results[0] = {madd_4ops(operands), 256}; break; } case Opcode::AArch64_FMADDSrrr: { // fmadd sn, sm, sa - results[0] = {multiplyHelp::madd_4ops(operands), 256}; + results[0] = {madd_4ops(operands), 256}; break; } case Opcode::AArch64_FMAD_ZPmZZ_D: { // fmad zd.d, pg/m, zn.d, zm.d - results[0] = sveHelp::sveFmadPredicated_vecs(operands, VL_bits); + results[0] = sveFmadPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_FMAD_ZPmZZ_S: { // fmad zd.s, pg/m, zn.s, zm.s - results[0] = sveHelp::sveFmadPredicated_vecs(operands, VL_bits); + results[0] = sveFmadPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_FMAXNMDrr: { // fmaxnm dd, dn, dm - results[0] = floatHelp::fmaxnm_3ops(operands); + results[0] = fmaxnm_3ops(operands); break; } case Opcode::AArch64_FMAXNMPv2i64p: { // fmaxnmp dd, vd.2d - results[0] = neonHelp::vecMaxnmp_2ops(operands); + results[0] = vecMaxnmp_2ops(operands); break; } case Opcode::AArch64_FMAXNMSrr: { // fmaxnm sd, sn, sm - results[0] = floatHelp::fmaxnm_3ops(operands); + results[0] = fmaxnm_3ops(operands); break; } case Opcode::AArch64_FMAXNMv2f64: { // fmaxnm vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](double x, double y) -> double { return std::fmax(x, y); }); break; } case Opcode::AArch64_FMINNMDrr: { // fminnm dd, dn, dm - results[0] = floatHelp::fminnm_3ops(operands); + results[0] = fminnm_3ops(operands); break; } case Opcode::AArch64_FMINNMPv2i64p: { // fminnmp dd, vd.2d - results[0] = neonHelp::vecMinv_2ops(operands); + results[0] = vecMinv_2ops(operands); break; } case Opcode::AArch64_FMINNMSrr: { // fminnm sd, sn, sm - results[0] = floatHelp::fminnm_3ops(operands); + results[0] = fminnm_3ops(operands); break; } case Opcode::AArch64_FMINNMv2f64: { // fminnm vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](double x, double y) -> double { return std::fmin(x, y); }); break; } case Opcode::AArch64_FMLA_ZPmZZ_D: { // fmla zd.d, pg/m, zn.d, zm.d - results[0] = sveHelp::sveMlaPredicated_vecs(operands, VL_bits); + results[0] = sveMlaPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_FMLA_ZPmZZ_S: { // fmla zd.s, pg/m, zn.s, zm.s - results[0] = sveHelp::sveMlaPredicated_vecs(operands, VL_bits); + results[0] = sveMlaPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_FMLAv2f32: { // fmla vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecFmla_3vecs(operands); + results[0] = vecFmla_3vecs(operands); break; } case Opcode::AArch64_FMLA_ZZZI_D: { // fmla zda.d, zn.d, zm.d[index] - results[0] = - sveHelp::sveMlaIndexed_vecs(operands, metadata, VL_bits); + results[0] = sveMlaIndexed_vecs(operands, metadata, VL_bits); break; } case Opcode::AArch64_FMLA_ZZZI_S: { // fmla zda.s, zn.s, zm.s[index] - results[0] = - sveHelp::sveMlaIndexed_vecs(operands, metadata, VL_bits); + results[0] = sveMlaIndexed_vecs(operands, metadata, VL_bits); break; } case Opcode::AArch64_FMLAv2f64: { // fmla vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecFmla_3vecs(operands); + results[0] = vecFmla_3vecs(operands); break; } case Opcode::AArch64_FMLAv2i32_indexed: { // fmla vd.2s, vn.2s, // vm.2s[index] - results[0] = - neonHelp::vecFmlaIndexed_3vecs(operands, metadata); + results[0] = vecFmlaIndexed_3vecs(operands, metadata); break; } case Opcode::AArch64_FMLAv2i64_indexed: { // fmla vd.2d, vn.2d, // vm.d[index] - results[0] = - neonHelp::vecFmlaIndexed_3vecs(operands, metadata); + results[0] = vecFmlaIndexed_3vecs(operands, metadata); break; } case Opcode::AArch64_FMLAv4f32: { // fmla vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecFmla_3vecs(operands); + results[0] = vecFmla_3vecs(operands); break; } case Opcode::AArch64_FMLAv4i32_indexed: { // fmla vd.4s, vn.4s, // vm.s[index] - results[0] = - neonHelp::vecFmlaIndexed_3vecs(operands, metadata); + results[0] = vecFmlaIndexed_3vecs(operands, metadata); break; } case Opcode::AArch64_FMLS_ZPmZZ_D: { // fmls zd.d, pg/m, zn.d, zm.d - results[0] = sveHelp::sveFmlsPredicated_vecs(operands, VL_bits); + results[0] = sveFmlsPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_FMLS_ZPmZZ_S: { // fmls zd.s, pg/m, zn.s, zm.s - results[0] = sveHelp::sveFmlsPredicated_vecs(operands, VL_bits); + results[0] = sveFmlsPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_FMLSv2f64: { // fmls vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecFmls_3vecs(operands); + results[0] = vecFmls_3vecs(operands); break; } case Opcode::AArch64_FMLSv2i64_indexed: { - results[0] = - neonHelp::vecFmlsIndexed_3vecs(operands, metadata); + results[0] = vecFmlsIndexed_3vecs(operands, metadata); break; } case Opcode::AArch64_FMLSv4f32: { // fmls vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecFmls_3vecs(operands); + results[0] = vecFmls_3vecs(operands); break; } case Opcode::AArch64_FMLSv4i32_indexed: { // fmls vd.4s, vn.4s, // vm.s[index] - results[0] = - neonHelp::vecFmlsIndexed_3vecs(operands, metadata); + results[0] = vecFmlsIndexed_3vecs(operands, metadata); break; } case Opcode::AArch64_FMOPA_MPPZZ_D: { // fmopa zada.d, pn/m, pm/m, zn.d, @@ -1974,110 +1912,104 @@ void Instruction::execute() { break; } case Opcode::AArch64_FMOVv2f32_ns: { // fmov vd.2s, #imm - results[0] = neonHelp::vecMovi_imm(metadata); + results[0] = vecMovi_imm(metadata); break; } case Opcode::AArch64_FMOVv2f64_ns: { // fmov vd.2d, #imm - results[0] = neonHelp::vecMovi_imm(metadata); + results[0] = vecMovi_imm(metadata); break; } case Opcode::AArch64_FMOVv4f32_ns: { // fmov vd.4s, #imm - results[0] = neonHelp::vecMovi_imm(metadata); + results[0] = vecMovi_imm(metadata); break; } case Opcode::AArch64_FMSB_ZPmZZ_D: { // fmsb zd.d, pg/m, zn.d, zm.d - results[0] = sveHelp::sveFmsbPredicated_vecs(operands, VL_bits); + results[0] = sveFmsbPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_FMSB_ZPmZZ_S: { // fmsb zd.s, pg/m, zn.s, zm.s - results[0] = sveHelp::sveFmsbPredicated_vecs(operands, VL_bits); + results[0] = sveFmsbPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_FMSUBDrrr: { // fmsub dn, dm, da - results[0] = {multiplyHelp::msub_4ops(operands), 256}; + results[0] = {msub_4ops(operands), 256}; break; } case Opcode::AArch64_FMSUBSrrr: { // fmsub sn, sm, sa - results[0] = {multiplyHelp::msub_4ops(operands), 256}; + results[0] = {msub_4ops(operands), 256}; break; } case Opcode::AArch64_FMULDrr: { // fmul dd, dn, dm - results[0] = {multiplyHelp::mul_3ops(operands), 256}; + results[0] = {mul_3ops(operands), 256}; break; } case Opcode::AArch64_FMULSrr: { // fmul sd, sn, sm - results[0] = {multiplyHelp::mul_3ops(operands), 256}; + results[0] = {mul_3ops(operands), 256}; break; } case Opcode::AArch64_FMUL_ZPmI_D: { // fmul zd.d, pg/m, zn.d, #imm - results[0] = sveHelp::sveMulPredicated(operands, metadata, - VL_bits, true); + results[0] = + sveMulPredicated(operands, metadata, VL_bits, true); break; } case Opcode::AArch64_FMUL_ZPmI_S: { // fmul zd.s, pg/m, zn.s, #imm - results[0] = - sveHelp::sveMulPredicated(operands, metadata, VL_bits, true); + results[0] = sveMulPredicated(operands, metadata, VL_bits, true); break; } case Opcode::AArch64_FMUL_ZPmZ_D: { // fmul zdn.d, pg/m, zdn.d, zm.d - results[0] = sveHelp::sveMulPredicated(operands, metadata, - VL_bits, false); + results[0] = + sveMulPredicated(operands, metadata, VL_bits, false); break; } case Opcode::AArch64_FMUL_ZPmZ_S: { // fmul zdn.s, pg/m, zdn.s, zm.s - results[0] = sveHelp::sveMulPredicated(operands, metadata, - VL_bits, false); + results[0] = + sveMulPredicated(operands, metadata, VL_bits, false); break; } case Opcode::AArch64_FMUL_ZZZ_D: { // fmul zd.d, zn.d, zm.d - results[0] = sveHelp::sveFmul_3ops(operands, VL_bits); + results[0] = sveFmul_3ops(operands, VL_bits); break; } case Opcode::AArch64_FMUL_ZZZ_S: { // fmul zd.s, zn.s, zm.s - results[0] = sveHelp::sveFmul_3ops(operands, VL_bits); + results[0] = sveFmul_3ops(operands, VL_bits); break; } case Opcode::AArch64_FMULv1i32_indexed: { // fmul sd, sn, vm.s[index] - results[0] = - neonHelp::vecFmulIndexed_vecs(operands, metadata); + results[0] = vecFmulIndexed_vecs(operands, metadata); break; } case Opcode::AArch64_FMULv1i64_indexed: { // fmul dd, dn, vm.d[index] - results[0] = - neonHelp::vecFmulIndexed_vecs(operands, metadata); + results[0] = vecFmulIndexed_vecs(operands, metadata); break; } case Opcode::AArch64_FMULv2f32: { // fmul vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](float x, float y) -> float { return x * y; }); break; } case Opcode::AArch64_FMULv2f64: { // fmul vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](double x, double y) -> double { return x * y; }); break; } case Opcode::AArch64_FMULv2i32_indexed: { // fmul vd.2s, vn.2s, // vm.s[index] - results[0] = - neonHelp::vecFmulIndexed_vecs(operands, metadata); + results[0] = vecFmulIndexed_vecs(operands, metadata); break; } case Opcode::AArch64_FMULv2i64_indexed: { // fmul vd.2d, vn.2d, // vm.d[index] - results[0] = - neonHelp::vecFmulIndexed_vecs(operands, metadata); + results[0] = vecFmulIndexed_vecs(operands, metadata); break; } case Opcode::AArch64_FMULv4f32: { // fmul vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](float x, float y) -> float { return x * y; }); break; } case Opcode::AArch64_FMULv4i32_indexed: { // fmul vd.4s, vn.4s, // vm.s[index] - results[0] = - neonHelp::vecFmulIndexed_vecs(operands, metadata); + results[0] = vecFmulIndexed_vecs(operands, metadata); break; } case Opcode::AArch64_FNEGDr: { // fneg dd, dn @@ -2089,60 +2021,60 @@ void Instruction::execute() { break; } case Opcode::AArch64_FNEG_ZPmZ_D: { // fneg zd.d, pg/m, zn.d - results[0] = sveHelp::sveFnegPredicated(operands, VL_bits); + results[0] = sveFnegPredicated(operands, VL_bits); break; } case Opcode::AArch64_FNEG_ZPmZ_S: { // fneg zd.s, pg/m, zn.s - results[0] = sveHelp::sveFnegPredicated(operands, VL_bits); + results[0] = sveFnegPredicated(operands, VL_bits); break; } case Opcode::AArch64_FNEGv2f64: { // fneg vd.2d, vn.2d - results[0] = neonHelp::vecFneg_2ops(operands); + results[0] = vecFneg_2ops(operands); break; } case Opcode::AArch64_FNEGv4f32: { // fneg vd.4s, vn.4s - results[0] = neonHelp::vecFneg_2ops(operands); + results[0] = vecFneg_2ops(operands); break; } case Opcode::AArch64_FNMADDDrrr: { // fnmadd dd, dn, dm, da - results[0] = floatHelp::fnmadd_4ops(operands); + results[0] = fnmadd_4ops(operands); break; } case Opcode::AArch64_FNMADDSrrr: { // fnmadd sd, sn, sm, sa - results[0] = floatHelp::fnmadd_4ops(operands); + results[0] = fnmadd_4ops(operands); break; } case Opcode::AArch64_FNMLS_ZPmZZ_D: { // fnmls zd.d, pg/m, zn.d, zm.d - results[0] = sveHelp::sveFnmlsPredicated(operands, VL_bits); + results[0] = sveFnmlsPredicated(operands, VL_bits); break; } case Opcode::AArch64_FNMLS_ZPmZZ_S: { // fnmls zd.s, pg/m, zn.s, zm.s - results[0] = sveHelp::sveFnmlsPredicated(operands, VL_bits); + results[0] = sveFnmlsPredicated(operands, VL_bits); break; } case Opcode::AArch64_FNMSB_ZPmZZ_D: { // fnmsb zdn.d, pg/m, zm.d, za.d - results[0] = sveHelp::sveFnmsbPredicated(operands, VL_bits); + results[0] = sveFnmsbPredicated(operands, VL_bits); break; } case Opcode::AArch64_FNMSB_ZPmZZ_S: { // fnmsb zdn.s, pg/m, zm.s, za.s - results[0] = sveHelp::sveFnmsbPredicated(operands, VL_bits); + results[0] = sveFnmsbPredicated(operands, VL_bits); break; } case Opcode::AArch64_FNMSUBDrrr: { // fnmsub dd, dn, dm, da - results[0] = floatHelp::fnmsub_4ops(operands); + results[0] = fnmsub_4ops(operands); break; } case Opcode::AArch64_FNMSUBSrrr: { // fnmsub sd, sn, sm, sa - results[0] = floatHelp::fnmsub_4ops(operands); + results[0] = fnmsub_4ops(operands); break; } case Opcode::AArch64_FNMULDrr: { // fnmul dd, dn, dm - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](double x, double y) -> double { return -(x * y); }); break; } case Opcode::AArch64_FNMULSrr: { // fnmul sd, sn, sm - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](float x, float y) -> float { return -(x * y); }); break; } @@ -2151,61 +2083,59 @@ void Instruction::execute() { break; } case Opcode::AArch64_FRINTN_ZPmZ_D: { // frintn zd.d, pg/m, zn.d - results[0] = - sveHelp::sveFrintnPredicated(operands, VL_bits); + results[0] = sveFrintnPredicated(operands, VL_bits); break; } case Opcode::AArch64_FRINTN_ZPmZ_S: { // frintn zd.s, pg/m, zn.s - results[0] = - sveHelp::sveFrintnPredicated(operands, VL_bits); + results[0] = sveFrintnPredicated(operands, VL_bits); break; } case Opcode::AArch64_FRINTPDr: { // frintp dd, dn - results[0] = floatHelp::frintpScalar_2ops(operands); + results[0] = frintpScalar_2ops(operands); break; } case Opcode::AArch64_FRINTPSr: { // frintp sd, sn - results[0] = floatHelp::frintpScalar_2ops(operands); + results[0] = frintpScalar_2ops(operands); break; } case Opcode::AArch64_FRSQRTEv1i32: { // frsqrte sd, sn - results[0] = neonHelp::vecFrsqrte_2ops(operands); + results[0] = vecFrsqrte_2ops(operands); break; } case Opcode::AArch64_FRSQRTEv1i64: { // frsqrte dd, dn - results[0] = neonHelp::vecFrsqrte_2ops(operands); + results[0] = vecFrsqrte_2ops(operands); break; } case Opcode::AArch64_FRSQRTEv2f32: { // frsqrte vd.2s, vn.2s - results[0] = neonHelp::vecFrsqrte_2ops(operands); + results[0] = vecFrsqrte_2ops(operands); break; } case Opcode::AArch64_FRSQRTEv2f64: { // frsqrte vd.2d, vn.2d - results[0] = neonHelp::vecFrsqrte_2ops(operands); + results[0] = vecFrsqrte_2ops(operands); break; } case Opcode::AArch64_FRSQRTEv4f32: { // frsqrte vd.4s, vn.4s - results[0] = neonHelp::vecFrsqrte_2ops(operands); + results[0] = vecFrsqrte_2ops(operands); break; } case Opcode::AArch64_FRSQRTS32: { // frsqrts sd, sn, sm - results[0] = neonHelp::vecFrsqrts_3ops(operands); + results[0] = vecFrsqrts_3ops(operands); break; } case Opcode::AArch64_FRSQRTS64: { // frsqrts dd, dn, dm - results[0] = neonHelp::vecFrsqrts_3ops(operands); + results[0] = vecFrsqrts_3ops(operands); break; } case Opcode::AArch64_FRSQRTSv2f32: { // frsqrts vd.2s, vn.2s, vn.2s - results[0] = neonHelp::vecFrsqrts_3ops(operands); + results[0] = vecFrsqrts_3ops(operands); break; } case Opcode::AArch64_FRSQRTSv2f64: { // frsqrts vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecFrsqrts_3ops(operands); + results[0] = vecFrsqrts_3ops(operands); break; } case Opcode::AArch64_FRSQRTSv4f32: { // frsqrts vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecFrsqrts_3ops(operands); + results[0] = vecFrsqrts_3ops(operands); break; } case Opcode::AArch64_FSQRTDr: { // fsqrt dd, dn @@ -2217,83 +2147,78 @@ void Instruction::execute() { break; } case Opcode::AArch64_FSQRT_ZPmZ_D: { // fsqrt zd.d, pg/m, zn.d - results[0] = - sveHelp::sveFsqrtPredicated_2vecs(operands, VL_bits); + results[0] = sveFsqrtPredicated_2vecs(operands, VL_bits); break; } case Opcode::AArch64_FSQRT_ZPmZ_S: { // fsqrt zd.s, pg/m, zn.s - results[0] = - sveHelp::sveFsqrtPredicated_2vecs(operands, VL_bits); + results[0] = sveFsqrtPredicated_2vecs(operands, VL_bits); break; } case Opcode::AArch64_FSQRTv2f64: { // fsqrt vd.2d, vn.2d - results[0] = neonHelp::vecFsqrt_2ops(operands); + results[0] = vecFsqrt_2ops(operands); break; } case Opcode::AArch64_FSQRTv4f32: { // fsqrt vd.4s, vn.4s - results[0] = neonHelp::vecFsqrt_2ops(operands); + results[0] = vecFsqrt_2ops(operands); break; } case Opcode::AArch64_FSUBDrr: { // fsub dd, dn, dm - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](double x, double y) -> double { return x - y; }); break; } case Opcode::AArch64_FSUBR_ZPmZ_D: { // fsubr zdn.d, pg/m, zdn.d, zm.d - results[0] = - sveHelp::sveSubrPredicated_3vecs(operands, VL_bits); + results[0] = sveSubrPredicated_3vecs(operands, VL_bits); break; } case Opcode::AArch64_FSUBR_ZPmZ_S: { // fsubr zdn.s, pg/m, zdn.s, zm.s - results[0] = sveHelp::sveSubrPredicated_3vecs(operands, VL_bits); + results[0] = sveSubrPredicated_3vecs(operands, VL_bits); break; } case Opcode::AArch64_FSUBSrr: { // fsub ss, sn, sm - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](double x, double y) -> double { return x - y; }); break; } case Opcode::AArch64_FSUB_ZPmI_D: { // fsub zdn.d, pg/m, zdn.d, #imm - results[0] = - sveHelp::sveSubPredicated_imm(operands, metadata, VL_bits); + results[0] = sveSubPredicated_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_FSUB_ZPmI_S: { // fsub zdn.s, pg/m, zdn.s, #imm - results[0] = - sveHelp::sveSubPredicated_imm(operands, metadata, VL_bits); + results[0] = sveSubPredicated_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_FSUB_ZPmZ_D: { // fsub zdn.d, pg/m, zdn.d, zm.d - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](double x, double y) -> double { return x - y; }); break; } case Opcode::AArch64_FSUB_ZPmZ_S: { // fsub zdn.s, pg/m, zdn.s, zm.s - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](float x, float y) -> float { return x - y; }); break; } case Opcode::AArch64_FSUB_ZZZ_D: { // fsub zd.d, zn.d, zm.d - results[0] = sveHelp::sveSub_3vecs(operands, VL_bits); + results[0] = sveSub_3vecs(operands, VL_bits); break; } case Opcode::AArch64_FSUB_ZZZ_S: { // fsub zd.s, zn.s, zm.s - results[0] = sveHelp::sveSub_3vecs(operands, VL_bits); + results[0] = sveSub_3vecs(operands, VL_bits); break; } case Opcode::AArch64_FSUBv2f32: { - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](float x, float y) -> float { return x - y; }); break; } case Opcode::AArch64_FSUBv2f64: { // fsub vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](double x, double y) -> double { return x - y; }); break; } case Opcode::AArch64_FSUBv4f32: { // fsub vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](float x, float y) -> float { return x - y; }); break; } @@ -2397,159 +2322,147 @@ void Instruction::execute() { break; } case Opcode::AArch64_INCB_XPiI: { // incb xdn{, pattern{, #imm}} - results[0] = - sveHelp::sveInc_gprImm(operands, metadata, VL_bits); + results[0] = sveInc_gprImm(operands, metadata, VL_bits); break; } case Opcode::AArch64_INCD_XPiI: { // incd xdn{, pattern{, #imm}} - results[0] = - sveHelp::sveInc_gprImm(operands, metadata, VL_bits); + results[0] = sveInc_gprImm(operands, metadata, VL_bits); break; } case Opcode::AArch64_INCD_ZPiI: { // incd zdn.d{, pattern{, #imm}} - results[0] = sveHelp::sveInc_imm(operands, metadata, VL_bits); + results[0] = sveInc_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_INCH_XPiI: { // inch xdn{, pattern{, #imm}} - results[0] = - sveHelp::sveInc_gprImm(operands, metadata, VL_bits); + results[0] = sveInc_gprImm(operands, metadata, VL_bits); break; } case Opcode::AArch64_INCH_ZPiI: { // inch zdn.h{, pattern{, #imm}} - results[0] = sveHelp::sveInc_imm(operands, metadata, VL_bits); + results[0] = sveInc_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_INCP_XP_B: { // incp xdn, pm.b - results[0] = sveHelp::sveIncp_gpr(operands, VL_bits); + results[0] = sveIncp_gpr(operands, VL_bits); break; } case Opcode::AArch64_INCP_XP_D: { // incp xdn, pm.d - results[0] = sveHelp::sveIncp_gpr(operands, VL_bits); + results[0] = sveIncp_gpr(operands, VL_bits); break; } case Opcode::AArch64_INCP_XP_H: { // incp xdn, pm.h - results[0] = sveHelp::sveIncp_gpr(operands, VL_bits); + results[0] = sveIncp_gpr(operands, VL_bits); break; } case Opcode::AArch64_INCP_XP_S: { // incp xdn, pm.s - results[0] = sveHelp::sveIncp_gpr(operands, VL_bits); + results[0] = sveIncp_gpr(operands, VL_bits); break; } case Opcode::AArch64_INCW_XPiI: { // incw xdn{, pattern{, #imm}} - results[0] = - sveHelp::sveInc_gprImm(operands, metadata, VL_bits); + results[0] = sveInc_gprImm(operands, metadata, VL_bits); break; } case Opcode::AArch64_INCW_ZPiI: { // incw zdn.s{, pattern{, #imm}} - results[0] = sveHelp::sveInc_imm(operands, metadata, VL_bits); + results[0] = sveInc_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_INDEX_II_B: { // index zd.b, #imm, #imm - results[0] = - sveHelp::sveIndex(operands, metadata, VL_bits, true, true); + results[0] = sveIndex(operands, metadata, VL_bits, true, true); break; } case Opcode::AArch64_INDEX_II_D: { // index zd.d, #imm, #imm - results[0] = - sveHelp::sveIndex(operands, metadata, VL_bits, true, true); + results[0] = sveIndex(operands, metadata, VL_bits, true, true); break; } case Opcode::AArch64_INDEX_II_H: { // index zd.h, #imm, #imm - results[0] = - sveHelp::sveIndex(operands, metadata, VL_bits, true, true); + results[0] = sveIndex(operands, metadata, VL_bits, true, true); break; } case Opcode::AArch64_INDEX_II_S: { // index zd.s, #imm, #imm - results[0] = - sveHelp::sveIndex(operands, metadata, VL_bits, true, true); + results[0] = sveIndex(operands, metadata, VL_bits, true, true); break; } case Opcode::AArch64_INDEX_IR_B: { // index zd.b, #imm, wn - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, true, false); + results[0] = + sveIndex(operands, metadata, VL_bits, true, false); break; } case Opcode::AArch64_INDEX_IR_D: { // index zd.d, #imm, xn - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, true, false); + results[0] = sveIndex(operands, metadata, VL_bits, + true, false); break; } case Opcode::AArch64_INDEX_IR_H: { // index zd.h, #imm, wn - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, true, false); + results[0] = sveIndex(operands, metadata, VL_bits, + true, false); break; } case Opcode::AArch64_INDEX_IR_S: { // index zd.s, #imm, wn - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, true, false); + results[0] = sveIndex(operands, metadata, VL_bits, + true, false); break; } case Opcode::AArch64_INDEX_RI_B: { // index zd.b, wn, #imm - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, false, true); + results[0] = + sveIndex(operands, metadata, VL_bits, false, true); break; } case Opcode::AArch64_INDEX_RI_D: { // index zd.d, xn, #imm - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, false, true); + results[0] = sveIndex(operands, metadata, VL_bits, + false, true); break; } case Opcode::AArch64_INDEX_RI_H: { // index zd.h, wn, #imm - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, false, true); + results[0] = sveIndex(operands, metadata, VL_bits, + false, true); break; } case Opcode::AArch64_INDEX_RI_S: { // index zd.s, wn, #imm - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, false, true); + results[0] = sveIndex(operands, metadata, VL_bits, + false, true); break; } case Opcode::AArch64_INDEX_RR_B: { // index zd.b, wn, wm - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, false, false); + results[0] = sveIndex(operands, metadata, VL_bits, + false, false); break; } case Opcode::AArch64_INDEX_RR_D: { // index zd.d, xn, xm - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, false, false); + results[0] = sveIndex(operands, metadata, VL_bits, + false, false); break; } case Opcode::AArch64_INDEX_RR_H: { // index zd.h, wn, wm - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, false, false); + results[0] = sveIndex(operands, metadata, VL_bits, + false, false); break; } case Opcode::AArch64_INDEX_RR_S: { // index zd.s, wn, wm - results[0] = sveHelp::sveIndex(operands, metadata, - VL_bits, false, false); + results[0] = sveIndex(operands, metadata, VL_bits, + false, false); break; } case Opcode::AArch64_INSvi16gpr: { // ins vd.h[index], wn - results[0] = neonHelp::vecInsIndex_gpr(operands, - metadata); + results[0] = vecInsIndex_gpr(operands, metadata); break; } case Opcode::AArch64_INSvi32gpr: { // ins vd.s[index], wn - results[0] = neonHelp::vecInsIndex_gpr(operands, - metadata); + results[0] = vecInsIndex_gpr(operands, metadata); break; } case Opcode::AArch64_INSvi32lane: { // ins vd.s[index1], vn.s[index2] - results[0] = neonHelp::vecIns_2Index(operands, metadata); + results[0] = vecIns_2Index(operands, metadata); break; } case Opcode::AArch64_INSvi64gpr: { // ins vd.d[index], xn - results[0] = neonHelp::vecInsIndex_gpr(operands, - metadata); + results[0] = vecInsIndex_gpr(operands, metadata); break; } case Opcode::AArch64_INSvi64lane: { // ins vd.d[index1], vn.d[index2] - results[0] = neonHelp::vecIns_2Index(operands, metadata); + results[0] = vecIns_2Index(operands, metadata); break; } case Opcode::AArch64_INSvi8gpr: { // ins vd.b[index], wn - results[0] = neonHelp::vecInsIndex_gpr(operands, - metadata); + results[0] = vecInsIndex_gpr(operands, metadata); break; } case Opcode::AArch64_LD1_MXIPXX_H_D: { // ld1d {zath.d[ws, #imm]}, pg/z, @@ -3682,53 +3595,47 @@ void Instruction::execute() { break; } case Opcode::AArch64_LSLVWr: { // lslv wd, wn, wm - results[0] = { - logicalHelp::logicalShiftLR_3ops(operands, true), 8}; + results[0] = {logicalShiftLR_3ops(operands, true), 8}; break; } case Opcode::AArch64_LSLVXr: { // lslv xd, xn, xm - results[0] = logicalHelp::logicalShiftLR_3ops(operands, true); + results[0] = logicalShiftLR_3ops(operands, true); break; } case Opcode::AArch64_LSL_ZZI_S: { // lsl zd.s, zn.s, #imm - results[0] = sveHelp::sveLsl_imm(operands, metadata, VL_bits); + results[0] = sveLsl_imm(operands, metadata, VL_bits); break; } case Opcode::AArch64_LSRVWr: { // lsrv wd, wn, wm - results[0] = { - logicalHelp::logicalShiftLR_3ops(operands, false), 8}; + results[0] = {logicalShiftLR_3ops(operands, false), 8}; break; } case Opcode::AArch64_LSRVXr: { // lsrv xd, xn, xm - results[0] = - logicalHelp::logicalShiftLR_3ops(operands, false); + results[0] = logicalShiftLR_3ops(operands, false); break; } case Opcode::AArch64_MADDWrrr: { // madd wd, wn, wm, wa - results[0] = {multiplyHelp::madd_4ops(operands), 8}; + results[0] = {madd_4ops(operands), 8}; break; } case Opcode::AArch64_MADDXrrr: { // madd xd, xn, xm, xa - results[0] = multiplyHelp::madd_4ops(operands); + results[0] = madd_4ops(operands); break; } case Opcode::AArch64_MLA_ZPmZZ_B: { // mla zda.b, pg/m, zn.b, zm.b - results[0] = sveHelp::sveMlaPredicated_vecs(operands, VL_bits); + results[0] = sveMlaPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_MLA_ZPmZZ_D: { // mla zda.d, pg/m, zn.d, zm.d - results[0] = - sveHelp::sveMlaPredicated_vecs(operands, VL_bits); + results[0] = sveMlaPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_MLA_ZPmZZ_H: { // mla zda.h, pg/m, zn.h, zm.h - results[0] = - sveHelp::sveMlaPredicated_vecs(operands, VL_bits); + results[0] = sveMlaPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_MLA_ZPmZZ_S: { // mla zda.s, pg/m, zn.s, zm.s - results[0] = - sveHelp::sveMlaPredicated_vecs(operands, VL_bits); + results[0] = sveMlaPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_MOVID: { // movi dd, #imm @@ -3736,59 +3643,57 @@ void Instruction::execute() { break; } case Opcode::AArch64_MOVIv16b_ns: { // movi vd.16b, #imm - results[0] = neonHelp::vecMovi_imm(metadata); + results[0] = vecMovi_imm(metadata); break; } case Opcode::AArch64_MOVIv2d_ns: { // movi vd.2d, #imm - results[0] = neonHelp::vecMovi_imm(metadata); + results[0] = vecMovi_imm(metadata); break; } case Opcode::AArch64_MOVIv2i32: { // movi vd.2s, #imm{, lsl #shift} - results[0] = neonHelp::vecMoviShift_imm(metadata, false); + results[0] = vecMoviShift_imm(metadata, false); break; } case Opcode::AArch64_MOVIv4i32: { // movi vd.4s, #imm{, LSL #shift} - results[0] = neonHelp::vecMoviShift_imm(metadata, false); + results[0] = vecMoviShift_imm(metadata, false); break; } case Opcode::AArch64_MOVIv8b_ns: { // movi vd.8b, #imm - results[0] = neonHelp::vecMovi_imm(metadata); + results[0] = vecMovi_imm(metadata); break; } case Opcode::AArch64_MOVKWi: { // movk wd, #imm - results[0] = { - arithmeticHelp::movkShift_imm(operands, metadata), 8}; + results[0] = {movkShift_imm(operands, metadata), 8}; break; } case Opcode::AArch64_MOVKXi: { // movk xd, #imm - results[0] = - arithmeticHelp::movkShift_imm(operands, metadata); + results[0] = movkShift_imm(operands, metadata); break; } case Opcode::AArch64_MOVNWi: { // movn wd, #imm{, LSL #shift} - results[0] = {arithmeticHelp::movnShift_imm( + results[0] = {movnShift_imm( metadata, [](uint64_t x) -> uint32_t { return ~x; }), 8}; break; } case Opcode::AArch64_MOVNXi: { // movn xd, #imm{, LSL #shift} - results[0] = arithmeticHelp::movnShift_imm( + results[0] = movnShift_imm( metadata, [](uint64_t x) -> uint64_t { return ~x; }); break; } case Opcode::AArch64_MOVPRFX_ZPmZ_D: { // movprfx zd.d, pg/m, zn.d - results[0] = sveHelp::sveMovprfxPredicated_destUnchanged( - operands, VL_bits); + results[0] = + sveMovprfxPredicated_destUnchanged(operands, VL_bits); break; } case Opcode::AArch64_MOVPRFX_ZPzZ_D: { // movprfx zd.d, pg/z, zn.d - results[0] = sveHelp::sveMovprfxPredicated_destToZero( - operands, VL_bits); + results[0] = + sveMovprfxPredicated_destToZero(operands, VL_bits); break; } case Opcode::AArch64_MOVPRFX_ZPzZ_S: { // movprfx zd.s, pg/z, zn.s - results[0] = sveHelp::sveMovprfxPredicated_destToZero( - operands, VL_bits); + results[0] = + sveMovprfxPredicated_destToZero(operands, VL_bits); break; } case Opcode::AArch64_MOVPRFX_ZZ: { // movprfx zd, zn @@ -3797,13 +3702,13 @@ void Instruction::execute() { break; } case Opcode::AArch64_MOVZWi: { // movz wd, #imm - results[0] = {arithmeticHelp::movnShift_imm( + results[0] = {movnShift_imm( metadata, [](uint64_t x) -> uint32_t { return x; }), 8}; break; } case Opcode::AArch64_MOVZXi: { // movz xd, #imm - results[0] = arithmeticHelp::movnShift_imm( + results[0] = movnShift_imm( metadata, [](uint64_t x) -> uint64_t { return x; }); break; } @@ -3823,11 +3728,11 @@ void Instruction::execute() { break; } case Opcode::AArch64_MSUBWrrr: { // msub wd, wn, wm, wa - results[0] = {multiplyHelp::msub_4ops(operands), 8}; + results[0] = {msub_4ops(operands), 8}; break; } case Opcode::AArch64_MSUBXrrr: { // msub xd, xn, xm, xa - results[0] = multiplyHelp::msub_4ops(operands); + results[0] = msub_4ops(operands); break; } case Opcode::AArch64_MSRpstatesvcrImm1: { // msr svcr, #imm @@ -3850,118 +3755,116 @@ void Instruction::execute() { } } case Opcode::AArch64_MUL_ZPmZ_B: { // mul zdn.b, pg/m, zdn.b, zm.b - results[0] = sveHelp::sveMulPredicated(operands, metadata, - VL_bits, false); + results[0] = + sveMulPredicated(operands, metadata, VL_bits, false); break; } case Opcode::AArch64_MUL_ZPmZ_D: { // mul zdn.d, pg/m, zdn.d, zm.d - results[0] = sveHelp::sveMulPredicated(operands, metadata, - VL_bits, false); + results[0] = + sveMulPredicated(operands, metadata, VL_bits, false); break; } case Opcode::AArch64_MUL_ZPmZ_H: { // mul zdn.h, pg/m, zdn.h, zm.h - results[0] = sveHelp::sveMulPredicated(operands, metadata, - VL_bits, false); + results[0] = + sveMulPredicated(operands, metadata, VL_bits, false); break; } case Opcode::AArch64_MUL_ZPmZ_S: { // mul zdn.s, pg/m, zdn.s, zm.s - results[0] = sveHelp::sveMulPredicated(operands, metadata, - VL_bits, false); + results[0] = + sveMulPredicated(operands, metadata, VL_bits, false); break; } case Opcode::AArch64_MVNIv2i32: { // mvni vd.2s, #imm{, lsl #shift} - results[0] = neonHelp::vecMoviShift_imm(metadata, true); + results[0] = vecMoviShift_imm(metadata, true); break; } case Opcode::AArch64_MVNIv2s_msl: { // mvni vd.2s, #imm, msl #amount - results[0] = neonHelp::vecMoviShift_imm(metadata, true); + results[0] = vecMoviShift_imm(metadata, true); break; } case Opcode::AArch64_MVNIv4i16: { // mvni vd.4h, #imm{, lsl #shift} - results[0] = neonHelp::vecMoviShift_imm(metadata, true); + results[0] = vecMoviShift_imm(metadata, true); break; } case Opcode::AArch64_MVNIv4i32: { // mvni vd.4s, #imm{, lsl #shift} - results[0] = neonHelp::vecMoviShift_imm(metadata, true); + results[0] = vecMoviShift_imm(metadata, true); break; } case Opcode::AArch64_MVNIv4s_msl: { // mvni vd.4s #imm, msl #amount - results[0] = neonHelp::vecMoviShift_imm(metadata, true); + results[0] = vecMoviShift_imm(metadata, true); break; } case Opcode::AArch64_MVNIv8i16: { // mvni vd.8h, #imm{, lsl #shift} - results[0] = neonHelp::vecMoviShift_imm(metadata, true); + results[0] = vecMoviShift_imm(metadata, true); break; } case Opcode::AArch64_NEGv2i64: { // neg vd.2d, vn.2d - results[0] = neonHelp::vecFneg_2ops(operands); + results[0] = vecFneg_2ops(operands); break; } case Opcode::AArch64_NOTv16i8: { // not vd.16b, vn.16b - results[0] = neonHelp::vecLogicOp_2vecs( + results[0] = vecLogicOp_2vecs( operands, [](uint8_t x) -> uint8_t { return ~x; }); break; } case Opcode::AArch64_NOTv8i8: { // not vd.8b, vn.8b - results[0] = neonHelp::vecLogicOp_2vecs( + results[0] = vecLogicOp_2vecs( operands, [](uint8_t x) -> uint8_t { return ~x; }); break; } case Opcode::AArch64_ORNWrs: { // orn wd, wn, wm{, shift{ #amount}} - auto [result, nzcv] = logicalHelp::logicOpShift_3ops( + auto [result, nzcv] = logicOpShift_3ops( operands, metadata, false, [](uint32_t x, uint32_t y) -> uint32_t { return x | (~y); }); results[0] = {result, 8}; break; } case Opcode::AArch64_ORNXrs: { // orn xd, xn, xm{, shift{ #amount}} - auto [result, nzcv] = logicalHelp::logicOpShift_3ops( + auto [result, nzcv] = logicOpShift_3ops( operands, metadata, false, [](uint64_t x, uint64_t y) -> uint64_t { return x | (~y); }); results[0] = result; break; } case Opcode::AArch64_ORRWri: { // orr wd, wn, #imm - auto [result, nzcv] = logicalHelp::logicOp_imm( + auto [result, nzcv] = logicOp_imm( operands, metadata, false, [](uint32_t x, uint32_t y) -> uint32_t { return x | y; }); results[0] = {result, 8}; break; } case Opcode::AArch64_ORRWrs: { // orr wd, wn, wm{, shift{ #amount}} - results[0] = { - comparisonHelp::orrShift_3ops(operands, metadata), 8}; + results[0] = {orrShift_3ops(operands, metadata), 8}; break; } case Opcode::AArch64_ORRXri: { // orr xd, xn, #imm - auto [result, nzcv] = logicalHelp::logicOp_imm( + auto [result, nzcv] = logicOp_imm( operands, metadata, false, [](uint64_t x, uint64_t y) -> uint64_t { return x | y; }); results[0] = {result, 8}; break; } case Opcode::AArch64_ORRXrs: { // orr xd, xn, xm{, shift{ #amount}} - results[0] = - comparisonHelp::orrShift_3ops(operands, metadata); + results[0] = orrShift_3ops(operands, metadata); break; } case Opcode::AArch64_ORR_PPzPP: { // orr pd.b, pg/z, pn.b, pm.b - results[0] = sveHelp::sveLogicOp_preds( + results[0] = sveLogicOp_preds( operands, VL_bits, [](uint64_t x, uint64_t y) -> uint64_t { return x | y; }); break; } case Opcode::AArch64_ORR_ZZZ: { // orr zd.d, zn.d, zm.d - results[0] = sveHelp::sveOrr_3vecs(operands, VL_bits); + results[0] = sveOrr_3vecs(operands, VL_bits); break; } case Opcode::AArch64_ORRv16i8: { // orr vd.16b, Vn.16b, Vm.16b - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint8_t x, uint8_t y) -> uint8_t { return x | y; }); break; } case Opcode::AArch64_ORRv8i8: { // orr vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint8_t x, uint8_t y) -> uint8_t { return x | y; }); break; } @@ -3974,19 +3877,19 @@ void Instruction::execute() { break; } case Opcode::AArch64_PSEL_PPPRI_B: { // psel pd, pn, pm.b[wa, #imm] - results[0] = sveHelp::svePsel(operands, metadata, VL_bits); + results[0] = svePsel(operands, metadata, VL_bits); break; } case Opcode::AArch64_PSEL_PPPRI_D: { // psel pd, pn, pm.d[wa, #imm] - results[0] = sveHelp::svePsel(operands, metadata, VL_bits); + results[0] = svePsel(operands, metadata, VL_bits); break; } case Opcode::AArch64_PSEL_PPPRI_H: { // psel pd, pn, pm.h[wa, #imm] - results[0] = sveHelp::svePsel(operands, metadata, VL_bits); + results[0] = svePsel(operands, metadata, VL_bits); break; } case Opcode::AArch64_PSEL_PPPRI_S: { // psel pd, pn, pm.s[wa, #imm] - results[0] = sveHelp::svePsel(operands, metadata, VL_bits); + results[0] = svePsel(operands, metadata, VL_bits); break; } case Opcode::AArch64_PTEST_PP: { // ptest pg, pn.b @@ -3995,39 +3898,39 @@ void Instruction::execute() { std::array masked_n = {(g[0] & s[0]), (g[1] & s[1]), (g[2] & s[2]), (g[3] & s[3])}; // Byte count = 1 as destination predicate is regarding single bytes. - results[0] = AuxFunc::getNZCVfromPred(masked_n, VL_bits, 1); + results[0] = getNZCVfromPred(masked_n, VL_bits, 1); break; } case Opcode::AArch64_PTRUE_B: { // ptrue pd.b{, pattern} - results[0] = sveHelp::svePtrue(metadata, VL_bits); + results[0] = svePtrue(metadata, VL_bits); break; } case Opcode::AArch64_PTRUE_D: { // ptrue pd.d{, pattern} - results[0] = sveHelp::svePtrue(metadata, VL_bits); + results[0] = svePtrue(metadata, VL_bits); break; } case Opcode::AArch64_PTRUE_H: { // ptrue pd.h{, pattern} - results[0] = sveHelp::svePtrue(metadata, VL_bits); + results[0] = svePtrue(metadata, VL_bits); break; } case Opcode::AArch64_PTRUE_S: { // ptrue pd.s{, pattern} - results[0] = sveHelp::svePtrue(metadata, VL_bits); + results[0] = svePtrue(metadata, VL_bits); break; } case Opcode::AArch64_PUNPKHI_PP: { // punpkhi pd.h, pn.b - results[0] = sveHelp::svePunpk(operands, VL_bits, true); + results[0] = svePunpk(operands, VL_bits, true); break; } case Opcode::AArch64_PUNPKLO_PP: { // punpklo pd.h, pn.b - results[0] = sveHelp::svePunpk(operands, VL_bits, false); + results[0] = svePunpk(operands, VL_bits, false); break; } case Opcode::AArch64_RBITWr: { // rbit wd, wn - results[0] = {bitmanipHelp::rbit(operands, metadata), 8}; + results[0] = {rbit(operands, metadata), 8}; break; } case Opcode::AArch64_RBITXr: { // rbit xd, xn - results[0] = bitmanipHelp::rbit(operands, metadata); + results[0] = rbit(operands, metadata); break; } case Opcode::AArch64_RDVLI_XI: { // rdvl xd, #imm @@ -4041,129 +3944,123 @@ void Instruction::execute() { break; } case Opcode::AArch64_REV16v16i8: { // rev16 Vd.16b, Vn.16b - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV16v8i8: { // rev16 Vd.8b, Vn.8b - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV32v16i8: { // rev32 Vd.16b, Vn.16b - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV32v4i16: { // rev32 Vd.4h, Vn.4h - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV32v8i16: { // rev32 Vd.8h, Vn.8h - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV32v8i8: { // rev32 Vd.8b, Vn.8b - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV64v16i8: { // rev64 Vd.16b, Vn.16b - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV64v2i32: { // rev64 Vd.2s, Vn.2s - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV64v4i16: { // rev64 Vd.4h, Vn.4h - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV64v4i32: { // rev64 Vd.4s, Vn.4s - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV64v8i16: { // rev64 Vd.8h, Vn.8h - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REV64v8i8: { // rev64 Vd.8b Vn.8b - results[0] = neonHelp::vecRev(operands); + results[0] = vecRev(operands); break; } case Opcode::AArch64_REVXr: { // rev xd, xn - results[0] = bitmanipHelp::rev(operands); + results[0] = rev(operands); break; } case Opcode::AArch64_REV_PP_B: { // rev pd.b, pn.b - results[0] = sveHelp::sveRev_predicates(operands, VL_bits); + results[0] = sveRev_predicates(operands, VL_bits); break; } case Opcode::AArch64_REV_PP_D: { // rev pd.d, pn.d - results[0] = sveHelp::sveRev_predicates(operands, VL_bits); + results[0] = sveRev_predicates(operands, VL_bits); break; } case Opcode::AArch64_REV_PP_H: { // rev pd.h, pn.h - results[0] = sveHelp::sveRev_predicates(operands, VL_bits); + results[0] = sveRev_predicates(operands, VL_bits); break; } case Opcode::AArch64_REV_PP_S: { // rev pd.s, pn.s - results[0] = sveHelp::sveRev_predicates(operands, VL_bits); + results[0] = sveRev_predicates(operands, VL_bits); break; } case Opcode::AArch64_REV_ZZ_B: { // rev zd.b, zn.b - results[0] = sveHelp::sveRev_vecs(operands, VL_bits); + results[0] = sveRev_vecs(operands, VL_bits); break; } case Opcode::AArch64_REV_ZZ_D: { // rev zd.d, zn.d - results[0] = sveHelp::sveRev_vecs(operands, VL_bits); + results[0] = sveRev_vecs(operands, VL_bits); break; } case Opcode::AArch64_REV_ZZ_H: { // rev zd.h, zn.h - results[0] = sveHelp::sveRev_vecs(operands, VL_bits); + results[0] = sveRev_vecs(operands, VL_bits); break; } case Opcode::AArch64_REV_ZZ_S: { // rev zd.s, zn.s - results[0] = sveHelp::sveRev_vecs(operands, VL_bits); + results[0] = sveRev_vecs(operands, VL_bits); break; } case Opcode::AArch64_RORVWr: { // rorv wd, wn, wm - results[0] = {logicalHelp::rorv_3ops(operands), 8}; + results[0] = {rorv_3ops(operands), 8}; break; } case Opcode::AArch64_RORVXr: { // rorv xd, xn, xm - results[0] = logicalHelp::rorv_3ops(operands); + results[0] = rorv_3ops(operands); break; } case Opcode::AArch64_SBCWr: { // sbc wd, wn, wm - results[0] = {arithmeticHelp::sbc(operands), 8}; + results[0] = {sbc(operands), 8}; break; } case Opcode::AArch64_SBCXr: { // sbc xd, xn, xm - results[0] = arithmeticHelp::sbc(operands); + results[0] = sbc(operands); break; } case Opcode::AArch64_SBFMWri: { // sbfm wd, wn, #immr, #imms - results[0] = { - bitmanipHelp::bfm_2imms(operands, metadata, true, true), - 8}; + results[0] = {bfm_2imms(operands, metadata, true, true), 8}; break; } case Opcode::AArch64_SBFMXri: { // sbfm xd, xn, #immr, #imms - results[0] = - bitmanipHelp::bfm_2imms(operands, metadata, true, true); + results[0] = bfm_2imms(operands, metadata, true, true); break; } case Opcode::AArch64_SCVTFSWSri: { // scvtf sd, wn, #fbits - results[0] = - floatHelp::scvtf_FixedPoint(operands, metadata); + results[0] = scvtf_FixedPoint(operands, metadata); break; } case Opcode::AArch64_SCVTFSXDri: { // scvtf dd, xn, #fbits - results[0] = - floatHelp::scvtf_FixedPoint(operands, metadata); + results[0] = scvtf_FixedPoint(operands, metadata); break; } case Opcode::AArch64_SCVTFSXSri: { // scvtf sd, xn, #fbits - results[0] = - floatHelp::scvtf_FixedPoint(operands, metadata); + results[0] = scvtf_FixedPoint(operands, metadata); break; } case Opcode::AArch64_SCVTFUWDri: { // scvtf dd, wn @@ -4183,23 +4080,19 @@ void Instruction::execute() { break; } case Opcode::AArch64_SCVTF_ZPmZ_DtoD: { // scvtf zd.d, pg/m, zn.d - results[0] = - sveHelp::sveFcvtPredicated(operands, VL_bits); + results[0] = sveFcvtPredicated(operands, VL_bits); break; } case Opcode::AArch64_SCVTF_ZPmZ_DtoS: { // scvtf zd.s, pg/m, zn.d - results[0] = - sveHelp::sveFcvtPredicated(operands, VL_bits); + results[0] = sveFcvtPredicated(operands, VL_bits); break; } case Opcode::AArch64_SCVTF_ZPmZ_StoD: { // scvtf zd.d, pg/m, zn.s - results[0] = - sveHelp::sveFcvtPredicated(operands, VL_bits); + results[0] = sveFcvtPredicated(operands, VL_bits); break; } case Opcode::AArch64_SCVTF_ZPmZ_StoS: { // scvtf zd.s, pg/m, zn.s - results[0] = - sveHelp::sveFcvtPredicated(operands, VL_bits); + results[0] = sveFcvtPredicated(operands, VL_bits); break; } case Opcode::AArch64_SCVTFv1i32: { // scvtf sd, sn @@ -4211,128 +4104,121 @@ void Instruction::execute() { break; } case Opcode::AArch64_SCVTFv2f32: { // scvtf vd.2s, vn.2s - results[0] = neonHelp::vecScvtf_2vecs( + results[0] = vecScvtf_2vecs( operands, [](int32_t x) -> float { return static_cast(x); }); break; } case Opcode::AArch64_SCVTFv2f64: { // scvtf vd.2d, vn.2d - results[0] = neonHelp::vecScvtf_2vecs( + results[0] = vecScvtf_2vecs( operands, [](int64_t x) -> double { return static_cast(x); }); break; } case Opcode::AArch64_SCVTFv4f32: { // scvtf vd.4s, vn.4s - results[0] = neonHelp::vecScvtf_2vecs( + results[0] = vecScvtf_2vecs( operands, [](int32_t x) -> float { return static_cast(x); }); break; } case Opcode::AArch64_SDIVWr: { // sdiv wd, wn, wm - results[0] = {divideHelp::div_3ops(operands), 8}; + results[0] = {div_3ops(operands), 8}; break; } case Opcode::AArch64_SDIVXr: { // sdiv xd, xn, xm - results[0] = {divideHelp::div_3ops(operands), 8}; + results[0] = {div_3ops(operands), 8}; break; } case Opcode::AArch64_SEL_ZPZZ_D: { // sel zd.d, pg, zn.d, zm.d - results[0] = sveHelp::sveSel_zpzz(operands, VL_bits); + results[0] = sveSel_zpzz(operands, VL_bits); break; } case Opcode::AArch64_SEL_ZPZZ_S: { // sel zd.s, pg, zn.s, zm.s - results[0] = sveHelp::sveSel_zpzz(operands, VL_bits); + results[0] = sveSel_zpzz(operands, VL_bits); break; } case Opcode::AArch64_SHLd: { // shl dd, dn #imm - results[0] = - neonHelp::vecShlShift_vecImm(operands, metadata); + results[0] = vecShlShift_vecImm(operands, metadata); break; } case Opcode::AArch64_SHLv4i32_shift: { // shl vd.4s, vn.4s, #imm - results[0] = - neonHelp::vecShlShift_vecImm(operands, metadata); + results[0] = vecShlShift_vecImm(operands, metadata); break; } case Opcode::AArch64_SMADDLrrr: { // smaddl xd, wn, wm, xa - results[0] = multiplyHelp::maddl_4ops(operands); + results[0] = maddl_4ops(operands); break; } case Opcode::AArch64_SMAX_ZI_S: { // smax zdn.s, zdn.s, #imm - results[0] = - sveHelp::sveMax_vecImm(operands, metadata, VL_bits); + results[0] = sveMax_vecImm(operands, metadata, VL_bits); break; } case Opcode::AArch64_SMAX_ZPmZ_S: { // smax zd.s, pg/m, zn.s, zm.s - results[0] = sveHelp::sveMaxPredicated_vecs(operands, VL_bits); + results[0] = sveMaxPredicated_vecs(operands, VL_bits); break; } case Opcode::AArch64_SMAXv4i32: { // smax vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](int32_t x, int32_t y) -> int32_t { return std::max(x, y); }); break; } case Opcode::AArch64_SMINV_VPZ_S: { // sminv sd, pg, zn.s - results[0] = sveHelp::sveSminv(operands, VL_bits); + results[0] = sveSminv(operands, VL_bits); break; } case Opcode::AArch64_SMINVv4i32v: { // sminv sd, vn.4s - results[0] = neonHelp::vecMinv_2ops(operands); + results[0] = vecMinv_2ops(operands); break; } case Opcode::AArch64_SMIN_ZPmZ_S: { // smin zd.s, pg/m, zn.s, zm.s - results[0] = sveHelp::sveLogicOpPredicated_3vecs( + results[0] = sveLogicOpPredicated_3vecs( operands, VL_bits, [](int32_t x, int32_t y) -> int32_t { return std::min(x, y); }); break; } case Opcode::AArch64_SMINv4i32: { // smin vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](int32_t x, int32_t y) -> int32_t { return std::min(x, y); }); break; } case Opcode::AArch64_SMSUBLrrr: { // smsubl xd, wn, wm, xa - results[0] = arithmeticHelp::msubl_4ops(operands); + results[0] = msubl_4ops(operands); break; } case Opcode::AArch64_SMULH_ZPmZ_B: { // smulh zdn.b, pg/m, zdn.b, zm.b - results[0] = - sveHelp::sveMulhPredicated(operands, VL_bits); + results[0] = sveMulhPredicated(operands, VL_bits); break; } case Opcode::AArch64_SMULH_ZPmZ_H: { // smulh zdn.h, pg/m, zdn.h, zm.h - results[0] = - sveHelp::sveMulhPredicated(operands, VL_bits); + results[0] = sveMulhPredicated(operands, VL_bits); break; } case Opcode::AArch64_SMULH_ZPmZ_S: { // smulh zdn.s, pg/m, zdn.s, zm.s - results[0] = - sveHelp::sveMulhPredicated(operands, VL_bits); + results[0] = sveMulhPredicated(operands, VL_bits); break; } case Opcode::AArch64_SMULHrr: { // smulh xd, xn, xm // TODO: signed - results[0] = AuxFunc::mulhi(operands[0].get(), - operands[1].get()); + results[0] = + mulhi(operands[0].get(), operands[1].get()); break; } case Opcode::AArch64_SSHLLv2i32_shift: { // sshll vd.2d, vn.2s, #imm - results[0] = neonHelp::vecShllShift_vecImm( - operands, metadata, false); + results[0] = + vecShllShift_vecImm(operands, metadata, false); break; } case Opcode::AArch64_SSHLLv4i32_shift: { // sshll2 vd.2d, vn.4s, #imm - results[0] = neonHelp::vecShllShift_vecImm( - operands, metadata, true); + results[0] = + vecShllShift_vecImm(operands, metadata, true); break; } case Opcode::AArch64_SHRNv8i8_shift: { // shrn vd.8b, vn.8h, #imm - results[0] = neonHelp::vecShrnShift_imm(operands, - metadata); + results[0] = vecShrnShift_imm(operands, metadata); break; } case Opcode::AArch64_SSHRv4i32_shift: { // sshr vd.4s, vn.4s, #imm - results[0] = neonHelp::vecSshrShift_imm(operands, metadata); + results[0] = vecSshrShift_imm(operands, metadata); break; } case Opcode::AArch64_SST1B_D_REAL: { // st1b {zd.d}, pg, [xn, zm.d] @@ -4416,8 +4302,7 @@ void Instruction::execute() { (ws + metadata.operands[0].sme_index.disp) % partition_num; const uint64_t* tileSlice = operands[sliceNum].getAsVector(); - memoryData = - sveHelp::sve_merge_store_data(tileSlice, pg, VL_bits); + memoryData = sve_merge_store_data(tileSlice, pg, VL_bits); break; } @@ -4470,8 +4355,7 @@ void Instruction::execute() { (ws + metadata.operands[0].sme_index.disp) % partition_num; const uint32_t* tileSlice = operands[sliceNum].getAsVector(); - memoryData = - sveHelp::sve_merge_store_data(tileSlice, pg, VL_bits); + memoryData = sve_merge_store_data(tileSlice, pg, VL_bits); break; } @@ -4547,7 +4431,7 @@ void Instruction::execute() { const uint8_t* d = operands[0].getAsVector(); const uint64_t* p = operands[1].getAsVector(); - memoryData = sveHelp::sve_merge_store_data(d, p, VL_bits); + memoryData = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1B_IMM: { // st1b {zt.b}, pg, [xn{, #imm, mul vl}] @@ -4555,7 +4439,7 @@ void Instruction::execute() { const uint8_t* d = operands[0].getAsVector(); const uint64_t* p = operands[1].getAsVector(); - memoryData = sveHelp::sve_merge_store_data(d, p, VL_bits); + memoryData = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1D: { // st1d {zt.d}, pg, [xn, xm, lsl #3] @@ -4563,7 +4447,7 @@ void Instruction::execute() { const uint64_t* d = operands[0].getAsVector(); const uint64_t* p = operands[1].getAsVector(); - memoryData = sveHelp::sve_merge_store_data(d, p, VL_bits); + memoryData = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1D_IMM: { // st1d {zt.d}, pg, [xn{, #imm, mul vl}] @@ -4571,7 +4455,7 @@ void Instruction::execute() { const uint64_t* d = operands[0].getAsVector(); const uint64_t* p = operands[1].getAsVector(); - memoryData = sveHelp::sve_merge_store_data(d, p, VL_bits); + memoryData = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1Fourv16b: { // st1 {vt.16b, vt2.16b, vt3.16b, @@ -4732,7 +4616,7 @@ void Instruction::execute() { const uint32_t* d = operands[0].getAsVector(); const uint64_t* p = operands[1].getAsVector(); - memoryData = sveHelp::sve_merge_store_data(d, p, VL_bits); + memoryData = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1W_D: { // st1w {zt.d}, pg, [xn, xm, lsl #2] @@ -4740,8 +4624,7 @@ void Instruction::execute() { const uint64_t* d = operands[0].getAsVector(); const uint64_t* p = operands[1].getAsVector(); - memoryData = - sveHelp::sve_merge_store_data(d, p, VL_bits); + memoryData = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1W_IMM: { // st1w {zt.s}, pg, [xn{, #imm, mul vl}] @@ -4749,7 +4632,7 @@ void Instruction::execute() { const uint32_t* d = operands[0].getAsVector(); const uint64_t* p = operands[1].getAsVector(); - memoryData = sveHelp::sve_merge_store_data(d, p, VL_bits); + memoryData = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1i16: { // st1 {vt.h}[index], [xn] @@ -5113,36 +4996,32 @@ void Instruction::execute() { break; } case Opcode::AArch64_SUBSWri: { // subs wd, wn, #imm - auto [result, nzcv] = - arithmeticHelp::subShift_imm(operands, metadata, true); + auto [result, nzcv] = subShift_imm(operands, metadata, true); results[0] = nzcv; results[1] = {result, 8}; break; } case Opcode::AArch64_SUBSWrs: { // subs wd, wn, wm{, shift #amount} - auto [result, nzcv] = - arithmeticHelp::subShift_3ops(operands, metadata, true); + auto [result, nzcv] = subShift_3ops(operands, metadata, true); results[0] = nzcv; results[1] = {result, 8}; break; } case Opcode::AArch64_SUBSWrx: { // subs wd, wn, wm{, extend #amount} auto [result, nzcv] = - arithmeticHelp::subExtend_3ops(operands, metadata, true); + subExtend_3ops(operands, metadata, true); results[0] = nzcv; results[1] = {result, 8}; break; } case Opcode::AArch64_SUBSXri: { // subs xd, xn, #imm - auto [result, nzcv] = - arithmeticHelp::subShift_imm(operands, metadata, true); + auto [result, nzcv] = subShift_imm(operands, metadata, true); results[0] = nzcv; results[1] = result; break; } case Opcode::AArch64_SUBSXrs: { // subs xd, xn, xm{, shift #amount} - auto [result, nzcv] = - arithmeticHelp::subShift_3ops(operands, metadata, true); + auto [result, nzcv] = subShift_3ops(operands, metadata, true); results[0] = nzcv; results[1] = result; break; @@ -5150,95 +5029,93 @@ void Instruction::execute() { case Opcode::AArch64_SUBSXrx: // subs xd, xn, wm{, extend #amount} case Opcode::AArch64_SUBSXrx64: { // subs xd, xn, xm{, extend #amount} auto [result, nzcv] = - arithmeticHelp::subExtend_3ops(operands, metadata, true); + subExtend_3ops(operands, metadata, true); results[0] = nzcv; results[1] = result; break; } case Opcode::AArch64_SUBWri: { // sub wd, wn, #imm{, } - auto [result, nzcv] = - arithmeticHelp::subShift_imm(operands, metadata, false); + auto [result, nzcv] = subShift_imm(operands, metadata, false); results[0] = {result, 8}; break; } case Opcode::AArch64_SUBWrs: { // sub wd, wn, wm{, shift #amount} auto [result, nzcv] = - arithmeticHelp::subShift_3ops(operands, metadata, false); + subShift_3ops(operands, metadata, false); results[0] = {result, 8}; break; } case Opcode::AArch64_SUBXri: { // sub xd, xn, #imm{, } - auto [result, nzcv] = - arithmeticHelp::subShift_imm(operands, metadata, false); + auto [result, nzcv] = subShift_imm(operands, metadata, false); results[0] = result; break; } case Opcode::AArch64_SUBXrs: { // sub xd, xn, xm{, shift #amount} auto [result, nzcv] = - arithmeticHelp::subShift_3ops(operands, metadata, false); + subShift_3ops(operands, metadata, false); results[0] = result; break; } case Opcode::AArch64_SUBXrx: // sub xd, xn, wm{, extend #amount} case Opcode::AArch64_SUBXrx64: { // sub xd, xn, xm{, extend #amount} auto [result, nzcv] = - arithmeticHelp::subExtend_3ops(operands, metadata, false); + subExtend_3ops(operands, metadata, false); results[0] = result; break; } case Opcode::AArch64_SUB_ZZZ_B: { // sub zd.b, zn.b, zm.b - results[0] = sveHelp::sveSub_3vecs(operands, VL_bits); + results[0] = sveSub_3vecs(operands, VL_bits); break; } case Opcode::AArch64_SUB_ZZZ_D: { // sub zd.d, zn.d, zm.d - results[0] = sveHelp::sveSub_3vecs(operands, VL_bits); + results[0] = sveSub_3vecs(operands, VL_bits); break; } case Opcode::AArch64_SUB_ZZZ_H: { // sub zd.h, zn.h, zm.h - results[0] = sveHelp::sveSub_3vecs(operands, VL_bits); + results[0] = sveSub_3vecs(operands, VL_bits); break; } case Opcode::AArch64_SUB_ZZZ_S: { // sub zd.s, zn.s, zm.s - results[0] = sveHelp::sveSub_3vecs(operands, VL_bits); + results[0] = sveSub_3vecs(operands, VL_bits); break; } case Opcode::AArch64_SUBv16i8: { // sub vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint8_t x, uint8_t y) -> uint8_t { return x - y; }); break; } case Opcode::AArch64_SUBv1i64: { // sub dd, dn, dm - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint64_t x, uint64_t y) -> uint64_t { return x - y; }); break; } case Opcode::AArch64_SUBv2i32: { // sub vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint32_t x, uint32_t y) -> uint32_t { return x - y; }); break; } case Opcode::AArch64_SUBv2i64: { // sub vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint64_t x, uint64_t y) -> uint64_t { return x - y; }); break; } case Opcode::AArch64_SUBv4i16: { // sub vd.4h, vn.4h, vm.4h - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint64_t x, uint16_t y) -> uint16_t { return x - y; }); break; } case Opcode::AArch64_SUBv4i32: { // sub vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint32_t x, uint32_t y) -> uint32_t { return x - y; }); break; } case Opcode::AArch64_SUBv8i16: { // sub vd.8h, vn.8h, vm.8h - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint16_t x, uint16_t y) -> uint16_t { return x - y; }); break; } case Opcode::AArch64_SUBv8i8: { // sub vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecLogicOp_3vecs( + results[0] = vecLogicOp_3vecs( operands, [](uint8_t x, uint8_t y) -> uint8_t { return x - y; }); break; } @@ -5248,8 +5125,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_SXTW_ZPmZ_D: { // sxtw zd.d, pg/m, zn.d - results[0] = - sveHelp::sveSxtPredicated(operands, VL_bits); + results[0] = sveSxtPredicated(operands, VL_bits); break; } case Opcode::AArch64_SYSxt: { // sys #, cn, cm, #{, xt} @@ -5267,182 +5143,179 @@ void Instruction::execute() { } case Opcode::AArch64_TBLv16i8Four: { // tbl Vd.16b {Vn.16b, Vn+1.16b, // Vn+2.16b,Vn+3.16b } Vm.16b - results[0] = neonHelp::vecTbl<16>(operands, metadata); + results[0] = vecTbl<16>(operands, metadata); break; } case Opcode::AArch64_TBLv16i8One: { // tbl Vd.16b {Vn.16b} Vm.16b - results[0] = neonHelp::vecTbl<16>(operands, metadata); + results[0] = vecTbl<16>(operands, metadata); break; } case Opcode::AArch64_TBLv16i8Three: { // tbl Vd.16b {Vn.16b, Vn+1.16b, // Vn+2.16b } Vm.16b - results[0] = neonHelp::vecTbl<16>(operands, metadata); + results[0] = vecTbl<16>(operands, metadata); break; } case Opcode::AArch64_TBLv16i8Two: { // tbl Vd.16b {Vn.16b, Vn+1.16b } // Vm.16b - results[0] = neonHelp::vecTbl<16>(operands, metadata); + results[0] = vecTbl<16>(operands, metadata); break; } case Opcode::AArch64_TBLv8i8Four: { // tbl Vd.8b {Vn.16b, Vn+1.16b, // Vn+2.16b,Vn+3.16b } Vm.8b - results[0] = neonHelp::vecTbl<8>(operands, metadata); + results[0] = vecTbl<8>(operands, metadata); break; } case Opcode::AArch64_TBLv8i8One: { // tbl Vd.8b {Vn.16b} Vm.8b - results[0] = neonHelp::vecTbl<8>(operands, metadata); + results[0] = vecTbl<8>(operands, metadata); break; } case Opcode::AArch64_TBLv8i8Three: { // tbl Vd.8b {Vn.16b, Vn+1.16b, // Vn+2.16b } Vm.8b - results[0] = neonHelp::vecTbl<8>(operands, metadata); + results[0] = vecTbl<8>(operands, metadata); break; } case Opcode::AArch64_TBLv8i8Two: { // tbl Vd.8b {Vn.16b, Vn+1.16b } Vm.8b - results[0] = neonHelp::vecTbl<8>(operands, metadata); + results[0] = vecTbl<8>(operands, metadata); break; } case Opcode::AArch64_TBNZW: { // tbnz wn, #imm, label - auto [taken, addr] = conditionalHelp::tbnz_tbz( - operands, metadata, instructionAddress_, true); + auto [taken, addr] = + tbnz_tbz(operands, metadata, instructionAddress_, true); branchTaken_ = taken; branchAddress_ = addr; break; } case Opcode::AArch64_TBNZX: { // tbnz xn, #imm, label - auto [taken, addr] = conditionalHelp::tbnz_tbz( - operands, metadata, instructionAddress_, true); + auto [taken, addr] = + tbnz_tbz(operands, metadata, instructionAddress_, true); branchTaken_ = taken; branchAddress_ = addr; break; } case Opcode::AArch64_TBZW: { // tbz wn, #imm, label - auto [taken, addr] = conditionalHelp::tbnz_tbz( - operands, metadata, instructionAddress_, false); + auto [taken, addr] = + tbnz_tbz(operands, metadata, instructionAddress_, false); branchTaken_ = taken; branchAddress_ = addr; break; } case Opcode::AArch64_TBZX: { // tbz xn, #imm, label - auto [taken, addr] = conditionalHelp::tbnz_tbz( - operands, metadata, instructionAddress_, false); + auto [taken, addr] = + tbnz_tbz(operands, metadata, instructionAddress_, false); branchTaken_ = taken; branchAddress_ = addr; break; } case Opcode::AArch64_TRN1_ZZZ_B: { // trn1 zd.b, zn.b, zm.b - results[0] = sveHelp::sveTrn1_3vecs(operands, VL_bits); + results[0] = sveTrn1_3vecs(operands, VL_bits); break; } case Opcode::AArch64_TRN1_ZZZ_D: { // trn1 zd.d, zn.d, zm.d - results[0] = sveHelp::sveTrn1_3vecs(operands, VL_bits); + results[0] = sveTrn1_3vecs(operands, VL_bits); break; } case Opcode::AArch64_TRN1_ZZZ_H: { // trn1 zd.h, zn.h, zm.h - results[0] = sveHelp::sveTrn1_3vecs(operands, VL_bits); + results[0] = sveTrn1_3vecs(operands, VL_bits); break; } case Opcode::AArch64_TRN1_ZZZ_S: { // trn1 zd.s, zn.s, zm.s - results[0] = sveHelp::sveTrn1_3vecs(operands, VL_bits); + results[0] = sveTrn1_3vecs(operands, VL_bits); break; } case Opcode::AArch64_TRN1v16i8: { // trn1 vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecTrn1(operands); + results[0] = vecTrn1(operands); break; } case Opcode::AArch64_TRN1v2i32: { // trn1 vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecTrn1(operands); + results[0] = vecTrn1(operands); break; } case Opcode::AArch64_TRN1v2i64: { // trn1 vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecTrn1(operands); + results[0] = vecTrn1(operands); break; } case Opcode::AArch64_TRN1v4i16: { // trn1 vd.4h, vn.4h, vm.4h - results[0] = neonHelp::vecTrn1(operands); + results[0] = vecTrn1(operands); break; } case Opcode::AArch64_TRN1v4i32: { // trn1 vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecTrn1(operands); + results[0] = vecTrn1(operands); break; } case Opcode::AArch64_TRN1v8i16: { // trn1 vd.8h, vn.8h, vm.8h - results[0] = neonHelp::vecTrn1(operands); + results[0] = vecTrn1(operands); break; } case Opcode::AArch64_TRN1v8i8: { // trn1 vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecTrn1(operands); + results[0] = vecTrn1(operands); break; } case Opcode::AArch64_TRN2_ZZZ_B: { // trn2 zd.b, zn.b, zm.b - results[0] = sveHelp::sveTrn2_3vecs(operands, VL_bits); + results[0] = sveTrn2_3vecs(operands, VL_bits); break; } case Opcode::AArch64_TRN2_ZZZ_D: { // trn2 zd.d, zn.d, zm.d - results[0] = sveHelp::sveTrn2_3vecs(operands, VL_bits); + results[0] = sveTrn2_3vecs(operands, VL_bits); break; } case Opcode::AArch64_TRN2_ZZZ_H: { // trn2 zd.h, zn.h, zm.h - results[0] = sveHelp::sveTrn2_3vecs(operands, VL_bits); + results[0] = sveTrn2_3vecs(operands, VL_bits); break; } case Opcode::AArch64_TRN2_ZZZ_S: { // trn2 zd.s, zn.s, zm.s - results[0] = sveHelp::sveTrn2_3vecs(operands, VL_bits); + results[0] = sveTrn2_3vecs(operands, VL_bits); break; } case Opcode::AArch64_TRN2v16i8: { // trn2 vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecTrn2(operands); + results[0] = vecTrn2(operands); break; } case Opcode::AArch64_TRN2v2i32: { // trn2 vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecTrn2(operands); + results[0] = vecTrn2(operands); break; } case Opcode::AArch64_TRN2v2i64: { // trn2 vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecTrn2(operands); + results[0] = vecTrn2(operands); break; } case Opcode::AArch64_TRN2v4i16: { // trn2 vd.4h, vn.4h, vm.4h - results[0] = neonHelp::vecTrn2(operands); + results[0] = vecTrn2(operands); break; } case Opcode::AArch64_TRN2v4i32: { // trn2 vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecTrn2(operands); + results[0] = vecTrn2(operands); break; } case Opcode::AArch64_TRN2v8i16: { // trn2 vd.8h, vn.8h, vm.8h - results[0] = neonHelp::vecTrn2(operands); + results[0] = vecTrn2(operands); break; } case Opcode::AArch64_TRN2v8i8: { // trn2 vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecTrn2(operands); + results[0] = vecTrn2(operands); break; } case Opcode::AArch64_UADDV_VPZ_B: { // uaddv dd, pg, zn.b - results[0] = sveHelp::sveAddvPredicated(operands, VL_bits); + results[0] = sveAddvPredicated(operands, VL_bits); break; } case Opcode::AArch64_UADDV_VPZ_D: { // uaddv dd, pg, zn.d - results[0] = sveHelp::sveAddvPredicated(operands, VL_bits); + results[0] = sveAddvPredicated(operands, VL_bits); break; } case Opcode::AArch64_UADDV_VPZ_H: { // uaddv dd, pg, zn.h - results[0] = sveHelp::sveAddvPredicated(operands, VL_bits); + results[0] = sveAddvPredicated(operands, VL_bits); break; } case Opcode::AArch64_UADDV_VPZ_S: { // uaddv dd, pg, zn.s - results[0] = sveHelp::sveAddvPredicated(operands, VL_bits); + results[0] = sveAddvPredicated(operands, VL_bits); break; } case Opcode::AArch64_UBFMWri: { // ubfm wd, wn, #immr, #imms - results[0] = { - bitmanipHelp::bfm_2imms(operands, metadata, false, true), - 8}; + results[0] = {bfm_2imms(operands, metadata, false, true), 8}; break; } case Opcode::AArch64_UBFMXri: { // ubfm xd, xn, #immr, #imms - results[0] = - bitmanipHelp::bfm_2imms(operands, metadata, false, true); + results[0] = bfm_2imms(operands, metadata, false, true); break; } case Opcode::AArch64_UCVTFUWDri: { // ucvtf dd, wn @@ -5470,23 +5343,23 @@ void Instruction::execute() { break; } case Opcode::AArch64_UDIVWr: { // udiv wd, wn, wm - results[0] = {divideHelp::div_3ops(operands), 8}; + results[0] = {div_3ops(operands), 8}; break; } case Opcode::AArch64_UDIVXr: { // udiv xd, xn, xm - results[0] = {divideHelp::div_3ops(operands), 8}; + results[0] = {div_3ops(operands), 8}; break; } case Opcode::AArch64_UMADDLrrr: { // umaddl xd, wn, wm, xa - results[0] = multiplyHelp::maddl_4ops(operands); + results[0] = maddl_4ops(operands); break; } case Opcode::AArch64_UMAXPv16i8: { // umaxp vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecUMaxP(operands); + results[0] = vecUMaxP(operands); break; } case Opcode::AArch64_UMINPv16i8: { // uminp vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecUMinP(operands); + results[0] = vecUMinP(operands); break; } case Opcode::AArch64_UMOVvi32_idx0: // umov wd, vn.s[0] @@ -5508,224 +5381,214 @@ void Instruction::execute() { break; } case Opcode::AArch64_UMSUBLrrr: { // umsubl xd, wn, wm, xa - results[0] = arithmeticHelp::msubl_4ops(operands); + results[0] = msubl_4ops(operands); break; } case Opcode::AArch64_UMULHrr: { // umulh xd, xn, xm - results[0] = AuxFunc::mulhi(operands[0].get(), - operands[1].get()); + results[0] = + mulhi(operands[0].get(), operands[1].get()); break; } case Opcode::AArch64_UQDECD_WPiI: { // uqdecd wd{, pattern{, MUL #imm}} - results[0] = - sveHelp::sveUqdec(operands, metadata, VL_bits); + results[0] = sveUqdec(operands, metadata, VL_bits); break; } case Opcode::AArch64_UQDECD_XPiI: { // uqdecd xd{, pattern{, MUL #imm}} - results[0] = - sveHelp::sveUqdec(operands, metadata, VL_bits); + results[0] = sveUqdec(operands, metadata, VL_bits); break; } case Opcode::AArch64_UQDECH_XPiI: { // uqdech xd{, pattern{, MUL #imm}} - results[0] = - sveHelp::sveUqdec(operands, metadata, VL_bits); + results[0] = sveUqdec(operands, metadata, VL_bits); break; } case Opcode::AArch64_UQDECW_XPiI: { // uqdecw xd{, pattern{, MUL #imm}} - results[0] = - sveHelp::sveUqdec(operands, metadata, VL_bits); + results[0] = sveUqdec(operands, metadata, VL_bits); break; } case Opcode::AArch64_USHLLv16i8_shift: { // ushll2 vd.8h, vn.16b, #imm - results[0] = neonHelp::vecShllShift_vecImm( - operands, metadata, true); + results[0] = + vecShllShift_vecImm(operands, metadata, true); break; } case Opcode::AArch64_USHLLv4i16_shift: { // ushll vd.4s, vn.4h, #imm - results[0] = neonHelp::vecShllShift_vecImm( + results[0] = vecShllShift_vecImm( operands, metadata, false); break; } case Opcode::AArch64_USHLLv8i16_shift: { // ushll2 vd.4s, vn.8h, #imm - results[0] = neonHelp::vecShllShift_vecImm( - operands, metadata, true); + results[0] = vecShllShift_vecImm(operands, + metadata, true); break; } case Opcode::AArch64_USHLLv8i8_shift: { // ushll vd.8h, vn.8b, #imm - results[0] = neonHelp::vecShllShift_vecImm( - operands, metadata, false); + results[0] = vecShllShift_vecImm(operands, + metadata, false); break; } case Opcode::AArch64_UUNPKHI_ZZ_D: { // uunpkhi zd.d, zn.s - results[0] = - sveHelp::sveUnpk_vecs(operands, VL_bits, true); + results[0] = sveUnpk_vecs(operands, VL_bits, true); break; } case Opcode::AArch64_UUNPKHI_ZZ_H: { // uunpkhi zd.h, zn.b - results[0] = - sveHelp::sveUnpk_vecs(operands, VL_bits, true); + results[0] = sveUnpk_vecs(operands, VL_bits, true); break; } case Opcode::AArch64_UUNPKHI_ZZ_S: { // uunpkhi zd.s, zn.h - results[0] = - sveHelp::sveUnpk_vecs(operands, VL_bits, true); + results[0] = sveUnpk_vecs(operands, VL_bits, true); break; } case Opcode::AArch64_UUNPKLO_ZZ_D: { // uunpklo zd.d, zn.s - results[0] = - sveHelp::sveUnpk_vecs(operands, VL_bits, false); + results[0] = sveUnpk_vecs(operands, VL_bits, false); break; } case Opcode::AArch64_UUNPKLO_ZZ_H: { // uunpklo zd.h, zn.b - results[0] = - sveHelp::sveUnpk_vecs(operands, VL_bits, false); + results[0] = sveUnpk_vecs(operands, VL_bits, false); break; } case Opcode::AArch64_UUNPKLO_ZZ_S: { // uunpklo zd.s, zn.h - results[0] = - sveHelp::sveUnpk_vecs(operands, VL_bits, false); + results[0] = sveUnpk_vecs(operands, VL_bits, false); break; } case Opcode::AArch64_UZP1_ZZZ_S: { // uzp1 zd.s, zn.s, zm.s - results[0] = sveHelp::sveUzp_vecs(operands, VL_bits, true); + results[0] = sveUzp_vecs(operands, VL_bits, true); break; } case Opcode::AArch64_UZP1v16i8: { // uzp1 vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecUzp(operands, true); + results[0] = vecUzp(operands, true); break; } case Opcode::AArch64_UZP1v2i32: { // uzp1 vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecUzp(operands, true); + results[0] = vecUzp(operands, true); break; } case Opcode::AArch64_UZP1v2i64: { // uzp1 vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecUzp(operands, true); + results[0] = vecUzp(operands, true); break; } case Opcode::AArch64_UZP1v4i16: { // uzp1 vd.4h, vn.4h, vm.4h - results[0] = neonHelp::vecUzp(operands, true); + results[0] = vecUzp(operands, true); break; } case Opcode::AArch64_UZP1v4i32: { // uzp1 vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecUzp(operands, true); + results[0] = vecUzp(operands, true); break; } case Opcode::AArch64_UZP1v8i16: { // uzp1 vd.8h, vn.8h, vm.8h - results[0] = neonHelp::vecUzp(operands, true); + results[0] = vecUzp(operands, true); break; } case Opcode::AArch64_UZP1v8i8: { // uzp1 vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecUzp(operands, true); + results[0] = vecUzp(operands, true); break; } case Opcode::AArch64_UZP2v16i8: { // uzp2 vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecUzp(operands, false); + results[0] = vecUzp(operands, false); break; } case Opcode::AArch64_UZP2v2i32: { // uzp2 vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecUzp(operands, false); + results[0] = vecUzp(operands, false); break; } case Opcode::AArch64_UZP2v2i64: { // uzp2 vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecUzp(operands, false); + results[0] = vecUzp(operands, false); break; } case Opcode::AArch64_UZP2v4i16: { // uzp2 vd.4h, vn.4h, vm.4h - results[0] = neonHelp::vecUzp(operands, false); + results[0] = vecUzp(operands, false); break; } case Opcode::AArch64_UZP2v4i32: { // uzp2 vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecUzp(operands, false); + results[0] = vecUzp(operands, false); break; } case Opcode::AArch64_UZP2v8i16: { // uzp2 vd.8h, vn.8h, vm.8h - results[0] = neonHelp::vecUzp(operands, false); + results[0] = vecUzp(operands, false); break; } case Opcode::AArch64_UZP2v8i8: { // uzp2 vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecUzp(operands, false); + results[0] = vecUzp(operands, false); break; } case Opcode::AArch64_WHILELO_PWW_B: { // whilelo pd.b, wn, wm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELO_PWW_D: { // whilelo pd.d, wn, wm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELO_PWW_H: { // whilelo pd.h, wn, wm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELO_PWW_S: { // whilelo pd.s, wn, wm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELO_PXX_B: { // whilelo pd.b, xn, xm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELO_PXX_D: { // whilelo pd.d, xn, xm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELO_PXX_H: { // whilelo pd.h, xn, xm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELO_PXX_S: { // whilelo pd.s, xn, xm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELT_PXX_B: { // whilelt pd.b, xn, xm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELT_PXX_D: { // whilelt pd.d, xn, xm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELT_PXX_H: { // whilelt pd.h, xn, xm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; } case Opcode::AArch64_WHILELT_PXX_S: { // whilelt pd.s, xn, xm auto [output, nzcv] = - sveHelp::sveWhilelo(operands, VL_bits, true); + sveWhilelo(operands, VL_bits, true); results[0] = nzcv; results[1] = output; break; @@ -5735,119 +5598,119 @@ void Instruction::execute() { break; } case Opcode::AArch64_XTNv2i32: { // xtn vd.2s, vn.2d - results[0] = neonHelp::vecXtn(operands, false); + results[0] = vecXtn(operands, false); break; } case Opcode::AArch64_XTNv4i16: { // xtn vd.4h, vn.4s - results[0] = neonHelp::vecXtn(operands, false); + results[0] = vecXtn(operands, false); break; } case Opcode::AArch64_XTNv4i32: { // xtn2 vd.4s, vn.2d - results[0] = neonHelp::vecXtn(operands, true); + results[0] = vecXtn(operands, true); break; } case Opcode::AArch64_ZIP1_PPP_B: { // zip1 pd.b, pn.b, pm.b - results[0] = sveHelp::sveZip_preds(operands, VL_bits, false); + results[0] = sveZip_preds(operands, VL_bits, false); break; } case Opcode::AArch64_ZIP1_PPP_D: { // zip1 pd.d, pn.d, pm.d - results[0] = sveHelp::sveZip_preds(operands, VL_bits, false); + results[0] = sveZip_preds(operands, VL_bits, false); break; } case Opcode::AArch64_ZIP1_PPP_H: { // zip1 pd.h, pn.h, pm.h - results[0] = sveHelp::sveZip_preds(operands, VL_bits, false); + results[0] = sveZip_preds(operands, VL_bits, false); break; } case Opcode::AArch64_ZIP1_PPP_S: { // zip1 pd.s, pn.s, pm.s - results[0] = sveHelp::sveZip_preds(operands, VL_bits, false); + results[0] = sveZip_preds(operands, VL_bits, false); break; } case Opcode::AArch64_ZIP1_ZZZ_D: { // zip1 zd.d, zn.d, zm.d - results[0] = sveHelp::sveZip_vecs(operands, VL_bits, false); + results[0] = sveZip_vecs(operands, VL_bits, false); break; } case Opcode::AArch64_ZIP1_ZZZ_S: { // zip1 zd.s, zn.s, zm.s - results[0] = sveHelp::sveZip_vecs(operands, VL_bits, false); + results[0] = sveZip_vecs(operands, VL_bits, false); break; } case Opcode::AArch64_ZIP1v16i8: { // zip1 vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecZip(operands, false); + results[0] = vecZip(operands, false); break; } case Opcode::AArch64_ZIP1v2i32: { // zip1 vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecZip(operands, false); + results[0] = vecZip(operands, false); break; } case Opcode::AArch64_ZIP1v2i64: { // zip1 vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecZip(operands, false); + results[0] = vecZip(operands, false); break; } case Opcode::AArch64_ZIP1v4i16: { // zip1 vd.4h, vn.4h, vm.4h - results[0] = neonHelp::vecZip(operands, false); + results[0] = vecZip(operands, false); break; } case Opcode::AArch64_ZIP1v4i32: { // zip1 vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecZip(operands, false); + results[0] = vecZip(operands, false); break; } case Opcode::AArch64_ZIP1v8i16: { // zip1 vd.8h, vn.8h, vm.8h - results[0] = neonHelp::vecZip(operands, false); + results[0] = vecZip(operands, false); break; } case Opcode::AArch64_ZIP1v8i8: { // zip1 vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecZip(operands, false); + results[0] = vecZip(operands, false); break; } case Opcode::AArch64_ZIP2_PPP_B: { // zip2 pd.b, pn.b, pm.b - results[0] = sveHelp::sveZip_preds(operands, VL_bits, true); + results[0] = sveZip_preds(operands, VL_bits, true); break; } case Opcode::AArch64_ZIP2_PPP_D: { // zip2 pd.d, pn.d, pm.d - results[0] = sveHelp::sveZip_preds(operands, VL_bits, true); + results[0] = sveZip_preds(operands, VL_bits, true); break; } case Opcode::AArch64_ZIP2_PPP_H: { // zip2 pd.h, pn.h, pm.h - results[0] = sveHelp::sveZip_preds(operands, VL_bits, true); + results[0] = sveZip_preds(operands, VL_bits, true); break; } case Opcode::AArch64_ZIP2_PPP_S: { // zip2 pd.s, pn.s, pm.s - results[0] = sveHelp::sveZip_preds(operands, VL_bits, true); + results[0] = sveZip_preds(operands, VL_bits, true); break; } case Opcode::AArch64_ZIP2_ZZZ_D: { // zip2 zd.d, zn.d, zm.d - results[0] = sveHelp::sveZip_vecs(operands, VL_bits, true); + results[0] = sveZip_vecs(operands, VL_bits, true); break; } case Opcode::AArch64_ZIP2_ZZZ_S: { // zip2 zd.s, zn.s, zm.s - results[0] = sveHelp::sveZip_vecs(operands, VL_bits, true); + results[0] = sveZip_vecs(operands, VL_bits, true); break; } case Opcode::AArch64_ZIP2v16i8: { // zip2 vd.16b, vn.16b, vm.16b - results[0] = neonHelp::vecZip(operands, true); + results[0] = vecZip(operands, true); break; } case Opcode::AArch64_ZIP2v2i32: { // zip2 vd.2s, vn.2s, vm.2s - results[0] = neonHelp::vecZip(operands, true); + results[0] = vecZip(operands, true); break; } case Opcode::AArch64_ZIP2v2i64: { // zip2 vd.2d, vn.2d, vm.2d - results[0] = neonHelp::vecZip(operands, true); + results[0] = vecZip(operands, true); break; } case Opcode::AArch64_ZIP2v4i16: { // zip2 vd.4h, vn.4h, vm.4h - results[0] = neonHelp::vecZip(operands, true); + results[0] = vecZip(operands, true); break; } case Opcode::AArch64_ZIP2v4i32: { // zip2 vd.4s, vn.4s, vm.4s - results[0] = neonHelp::vecZip(operands, true); + results[0] = vecZip(operands, true); break; } case Opcode::AArch64_ZIP2v8i16: { // zip2 vd.8h, vn.8h, vm.8h - results[0] = neonHelp::vecZip(operands, true); + results[0] = vecZip(operands, true); break; } case Opcode::AArch64_ZIP2v8i8: { // zip2 vd.8b, vn.8b, vm.8b - results[0] = neonHelp::vecZip(operands, true); + results[0] = vecZip(operands, true); break; } case Opcode::AArch64_ZERO_M: { // zero {mask} diff --git a/src/lib/arch/riscv/Architecture.cc b/src/lib/arch/riscv/Architecture.cc index 8765e3dee8..b641f8fbaa 100644 --- a/src/lib/arch/riscv/Architecture.cc +++ b/src/lib/arch/riscv/Architecture.cc @@ -235,7 +235,7 @@ int32_t Architecture::getSystemRegisterTag(uint16_t reg) const { // Check below is done for speculative instructions that may be passed into // the function but will not be executed. If such invalid speculative // instructions get through they can cause an out-of-range error. - if (!systemRegisterMap_.count(reg)) return 0; + if (!systemRegisterMap_.count(reg)) return -1; return systemRegisterMap_.at(reg); } diff --git a/src/lib/arch/riscv/ExceptionHandler.cc b/src/lib/arch/riscv/ExceptionHandler.cc index 07b1e5aac3..b6ebcbb924 100644 --- a/src/lib/arch/riscv/ExceptionHandler.cc +++ b/src/lib/arch/riscv/ExceptionHandler.cc @@ -874,7 +874,7 @@ void ExceptionHandler::printException(const Instruction& insn) const { std::cout << "execution not-yet-implemented"; break; case InstructionException::AliasNotYetImplemented: - std::cout << "alias not-yet-implemented" << std::endl; + std::cout << "alias not-yet-implemented"; break; case InstructionException::MisalignedPC: std::cout << "misaligned program counter"; diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc index 1af879f577..2c2eaf78cb 100644 --- a/src/lib/arch/riscv/Instruction.cc +++ b/src/lib/arch/riscv/Instruction.cc @@ -10,8 +10,6 @@ namespace simeng { namespace arch { namespace riscv { -const Register Instruction::ZERO_REGISTER = {RegisterType::GENERAL, 0}; - Instruction::Instruction(const Architecture& architecture, const InstructionMetadata& metadata) : architecture_(architecture), @@ -36,7 +34,7 @@ const span Instruction::getSourceRegisters() const { } const span Instruction::getSourceOperands() const { - return {const_cast(operands.data()), operands.size()}; + return {const_cast(operands.data()), sourceRegisterCount}; } const span Instruction::getDestinationRegisters() const { diff --git a/src/lib/arch/riscv/Instruction_decode.cc b/src/lib/arch/riscv/Instruction_decode.cc index 18c024eded..d997ff5b7d 100644 --- a/src/lib/arch/riscv/Instruction_decode.cc +++ b/src/lib/arch/riscv/Instruction_decode.cc @@ -52,7 +52,7 @@ Register csRegToRegister(unsigned int reg) { if (reg == RISCV_REG_X0) { // Zero register - return Instruction::ZERO_REGISTER; + return RegisterType::ZERO_REGISTER; } assert(false && "Decoding failed due to unknown register identifier"); @@ -148,7 +148,7 @@ void Instruction::decode() { sourceRegisters[sourceRegisterCount] = csRegToRegister(op.reg); if (sourceRegisters[sourceRegisterCount] == - Instruction::ZERO_REGISTER) { + RegisterType::ZERO_REGISTER) { // Catch zero register references and pre-complete those operands operands[sourceRegisterCount] = RegisterValue(0, 8); } else { @@ -157,7 +157,7 @@ void Instruction::decode() { sourceRegisterCount++; } else { - if (csRegToRegister(op.reg) != Instruction::ZERO_REGISTER) { + if (csRegToRegister(op.reg) != RegisterType::ZERO_REGISTER) { destinationRegisters[destinationRegisterCount] = csRegToRegister(op.reg); @@ -190,7 +190,7 @@ void Instruction::decode() { // Second or third operand sourceRegisters[sourceRegisterCount] = csRegToRegister(op.reg); - if (sourceRegisters[sourceRegisterCount] == Instruction::ZERO_REGISTER) { + if (sourceRegisters[sourceRegisterCount] == RegisterType::ZERO_REGISTER) { // Catch zero register references and pre-complete those operands operands[sourceRegisterCount] = RegisterValue(0, 8); } else { diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc index f04dc4677c..697c14c7c4 100644 --- a/src/lib/config/ModelConfig.cc +++ b/src/lib/config/ModelConfig.cc @@ -649,10 +649,14 @@ void ModelConfig::setExpectations(bool isDefault) { expectations_.addChild(ExpectationNode::createExpectation("CPU-Info")); expectations_["CPU-Info"].addChild(ExpectationNode::createExpectation( - false, "Generate-Special-Dir", true)); + true, "Generate-Special-Dir", true)); expectations_["CPU-Info"]["Generate-Special-Dir"].setValueSet( std::vector{false, true}); + expectations_["CPU-Info"].addChild( + ExpectationNode::createExpectation( + defaultSpecialFilePath_, "Special-File-Dir-Path", true)); + expectations_["CPU-Info"].addChild( ExpectationNode::createExpectation(1, "Core-Count", true)); expectations_["CPU-Info"]["Core-Count"].setValueBounds(1, @@ -887,6 +891,18 @@ void ModelConfig::postValidation() { for (const auto& prt : portnames) invalid_ << "\t- " << prt << " has no associated reservation station\n"; + // Ensure that given special file directory exists iff auto-generation is + // False + if (!configTree_["CPU-Info"]["Generate-Special-Dir"].as() && + !std::ifstream( + configTree_["CPU-Info"]["Special-File-Dir-Path"].as()) + .good()) { + invalid_ + << "\t- Special File Directory '" + << configTree_["CPU-Info"]["Special-File-Dir-Path"].as() + << "' does not exist\n"; + } + // Ensure the L1-[Data|Instruction]-Memory:Interface-Type restrictions are // enforced std::string simMode = diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc index a37b8e038a..bfd0c6c6f6 100644 --- a/src/lib/models/emulation/Core.cc +++ b/src/lib/models/emulation/Core.cc @@ -171,8 +171,6 @@ void Core::execute(std::shared_ptr& uop) { } if (uop->isStoreData()) { - auto results = uop->getResults(); - auto destinations = uop->getDestinationRegisters(); auto data = uop->getData(); for (size_t i = 0; i < previousAddresses_.size(); i++) { dataMemory_.requestWrite(previousAddresses_[i], data[i]); diff --git a/src/lib/models/outoforder/Core.cc b/src/lib/models/outoforder/Core.cc index ca4d5ce564..83a9dd9cda 100644 --- a/src/lib/models/outoforder/Core.cc +++ b/src/lib/models/outoforder/Core.cc @@ -168,11 +168,11 @@ void Core::flushIfNeeded() { // Check for flush bool euFlush = false; uint64_t targetAddress = 0; - uint64_t lowestSeqId = 0; + uint64_t lowestInsnId = 0; for (const auto& eu : executionUnits_) { - if (eu.shouldFlush() && (!euFlush || eu.getFlushSeqId() < lowestSeqId)) { + if (eu.shouldFlush() && (!euFlush || eu.getFlushInsnId() < lowestInsnId)) { euFlush = true; - lowestSeqId = eu.getFlushSeqId(); + lowestInsnId = eu.getFlushInsnId(); targetAddress = eu.getFlushAddress(); } } @@ -182,10 +182,10 @@ void Core::flushIfNeeded() { // Rename/Dispatch) if (reorderBuffer_.shouldFlush() && - (!euFlush || reorderBuffer_.getFlushSeqId() < lowestSeqId)) { + (!euFlush || reorderBuffer_.getFlushInsnId() < lowestInsnId)) { // If the reorder buffer found an older instruction to flush up to, do // that instead - lowestSeqId = reorderBuffer_.getFlushSeqId(); + lowestInsnId = reorderBuffer_.getFlushInsnId(); targetAddress = reorderBuffer_.getFlushAddress(); } @@ -201,7 +201,7 @@ void Core::flushIfNeeded() { renameToDispatchBuffer_.stall(false); // Flush everything younger than the bad instruction from the ROB - reorderBuffer_.flush(lowestSeqId); + reorderBuffer_.flush(lowestInsnId); decodeUnit_.purgeFlushed(); dispatchIssueUnit_.purgeFlushed(); loadStoreQueue_.purgeFlushed(); diff --git a/src/lib/pipeline/A64FXPortAllocator.cc b/src/lib/pipeline/A64FXPortAllocator.cc index d7cb9626ee..95a55cdb9b 100644 --- a/src/lib/pipeline/A64FXPortAllocator.cc +++ b/src/lib/pipeline/A64FXPortAllocator.cc @@ -145,6 +145,7 @@ uint16_t A64FXPortAllocator::allocate(const std::vector& ports) { } void A64FXPortAllocator::issued(uint16_t port) {} + void A64FXPortAllocator::deallocate(uint16_t port) { issued(port); }; uint8_t A64FXPortAllocator::attributeMapping( diff --git a/src/lib/pipeline/DispatchIssueUnit.cc b/src/lib/pipeline/DispatchIssueUnit.cc index e8019986bf..afd7390215 100644 --- a/src/lib/pipeline/DispatchIssueUnit.cc +++ b/src/lib/pipeline/DispatchIssueUnit.cc @@ -199,10 +199,6 @@ void DispatchIssueUnit::forwardOperands(const span& registers, } } -void DispatchIssueUnit::setRegisterReady(Register reg) { - scoreboard_[reg.type][reg.tag] = true; -} - void DispatchIssueUnit::purgeFlushed() { for (size_t i = 0; i < reservationStations_.size(); i++) { // Search the ready queues for flushed instructions and remove them diff --git a/src/lib/pipeline/ExecuteUnit.cc b/src/lib/pipeline/ExecuteUnit.cc index 7d789b34bd..c87c2e1845 100644 --- a/src/lib/pipeline/ExecuteUnit.cc +++ b/src/lib/pipeline/ExecuteUnit.cc @@ -165,7 +165,7 @@ void ExecuteUnit::execute(std::shared_ptr& uop) { bool ExecuteUnit::shouldFlush() const { return shouldFlush_; } uint64_t ExecuteUnit::getFlushAddress() const { return pc_; } -uint64_t ExecuteUnit::getFlushSeqId() const { return flushAfter_; } +uint64_t ExecuteUnit::getFlushInsnId() const { return flushAfter_; } void ExecuteUnit::purgeFlushed() { if (pipeline_.size() == 0) { diff --git a/src/lib/pipeline/RegisterAliasTable.cc b/src/lib/pipeline/RegisterAliasTable.cc index 4aba7852f9..23bcac3980 100644 --- a/src/lib/pipeline/RegisterAliasTable.cc +++ b/src/lib/pipeline/RegisterAliasTable.cc @@ -93,6 +93,7 @@ void RegisterAliasTable::commit(Register physical) { auto oldTag = historyTable_[physical.type][physical.tag]; freeQueues_[physical.type].push(oldTag); } + void RegisterAliasTable::rewind(Register physical) { assert(physical.renamed && "Attempted to rewind a physical register which hasn't been subject to " @@ -105,9 +106,6 @@ void RegisterAliasTable::rewind(Register physical) { // Add the rewound physical tag back to the free queue freeQueues_[physical.type].push(physical.tag); } -void RegisterAliasTable::free(Register physical) { - freeQueues_[physical.type].push(physical.tag); -} } // namespace pipeline } // namespace simeng diff --git a/src/lib/pipeline/ReorderBuffer.cc b/src/lib/pipeline/ReorderBuffer.cc index c2b17909b5..a706bab866 100644 --- a/src/lib/pipeline/ReorderBuffer.cc +++ b/src/lib/pipeline/ReorderBuffer.cc @@ -155,12 +155,12 @@ unsigned int ReorderBuffer::commit(unsigned int maxCommitSize) { return n; } -void ReorderBuffer::flush(uint64_t afterSeqId) { +void ReorderBuffer::flush(uint64_t afterInsnId) { // Iterate backwards from the tail of the queue to find and remove ops newer - // than `afterSeqId` + // than `afterInsnId` while (!buffer_.empty()) { auto& uop = buffer_.back(); - if (uop->getInstructionId() <= afterSeqId) { + if (uop->getInstructionId() <= afterInsnId) { break; } @@ -193,7 +193,7 @@ unsigned int ReorderBuffer::getFreeSpace() const { bool ReorderBuffer::shouldFlush() const { return shouldFlush_; } uint64_t ReorderBuffer::getFlushAddress() const { return pc_; } -uint64_t ReorderBuffer::getFlushSeqId() const { return flushAfter_; } +uint64_t ReorderBuffer::getFlushInsnId() const { return flushAfter_; } uint64_t ReorderBuffer::getInstructionsCommittedCount() const { return instructionsCommitted_; diff --git a/src/tools/simeng/main.cc b/src/tools/simeng/main.cc index 9e6bd05eef..32d1a11629 100644 --- a/src/tools/simeng/main.cc +++ b/src/tools/simeng/main.cc @@ -99,6 +99,11 @@ int main(int argc, char** argv) { else std::cout << "False"; std::cout << std::endl; + std::cout << "[SimEng] Special File directory used: " + << simeng::config::SimInfo::getConfig()["CPU-Info"] + ["Special-File-Dir-Path"] + .as() + << std::endl; std::cout << "[SimEng] Number of Cores: " << simeng::config::SimInfo::getConfig()["CPU-Info"]["Core-Count"] .as() diff --git a/sst/SimEngCoreWrapper.cc b/sst/SimEngCoreWrapper.cc index 338af9e002..b33af204e4 100644 --- a/sst/SimEngCoreWrapper.cc +++ b/sst/SimEngCoreWrapper.cc @@ -354,6 +354,11 @@ void SimEngCoreWrapper::fabricateSimEngCore() { else std::cout << "False"; std::cout << std::endl; + std::cout << "[SimEng] Special File directory used: " + << simeng::config::SimInfo::getConfig()["CPU-Info"] + ["Special-File-Dir-Path"] + .as() + << std::endl; std::cout << "[SimEng] Number of Cores: " << simeng::config::SimInfo::getConfig()["CPU-Info"]["Core-Count"] .as() diff --git a/test/integration/ConfigTest.cc b/test/integration/ConfigTest.cc index f121c0fb07..ed0f4124de 100644 --- a/test/integration/ConfigTest.cc +++ b/test/integration/ConfigTest.cc @@ -3,6 +3,7 @@ #include "gtest/gtest.h" #include "simeng/config/SimInfo.hh" +#include "simeng/version.hh" namespace { @@ -11,6 +12,7 @@ TEST(ConfigTest, Default) { // Test key default values exposed in SimInfo EXPECT_EQ(simeng::config::SimInfo::getConfigPath(), "Default"); EXPECT_EQ(simeng::config::SimInfo::getISA(), simeng::config::ISA::AArch64); + EXPECT_EQ(simeng::config::SimInfo::getISAString(), "AArch64"); EXPECT_EQ(simeng::config::SimInfo::getSimMode(), simeng::config::SimulationMode::Emulation); EXPECT_EQ(simeng::config::SimInfo::getSimModeStr(), "Emulation"); @@ -66,7 +68,8 @@ TEST(ConfigTest, Default) { " 'Instruction-Groups':\n - NONE\n 'Instruction-Opcodes':\n " " - 6343\n 'Execution-Latency': 1\n 'Execution-Throughput': 1\n " " 'Instruction-Group-Nums':\n - 87\n'CPU-Info':\n " - "'Generate-Special-Dir': 0\n 'Core-Count': 1\n 'Socket-Count': 1\n " + "'Generate-Special-Dir': 1\n 'Special-File-Dir-Path': " SIMENG_BUILD_DIR + "/specialFiles/\n 'Core-Count': 1\n 'Socket-Count': 1\n " "SMT: 1\n BogoMIPS: 0\n Features: ''\n 'CPU-Implementer': 0x0\n " "'CPU-Architecture': 0\n 'CPU-Variant': 0x0\n 'CPU-Part': 0x0\n " "'CPU-Revision': 0\n 'Package-Count': 1\n"; @@ -77,6 +80,7 @@ TEST(ConfigTest, Default) { // Test SimInfo exposed have correctly changed EXPECT_EQ(simeng::config::SimInfo::getISA(), simeng::config::ISA::RV64); + EXPECT_EQ(simeng::config::SimInfo::getISAString(), "rv64"); sysRegisterEnums = {simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FFLAGS, simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FRM, simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FCSR, @@ -119,7 +123,8 @@ TEST(ConfigTest, Default) { " 'Instruction-Groups':\n - NONE\n 'Instruction-Opcodes':\n " " - 450\n 'Execution-Latency': 1\n 'Execution-Throughput': 1\n " "'Instruction-Group-Nums':\n - 24\n'CPU-Info':\n " - "'Generate-Special-Dir': 0\n 'Core-Count': 1\n 'Socket-Count': 1\n " + "'Generate-Special-Dir': 1\n 'Special-File-Dir-Path': " SIMENG_BUILD_DIR + "/specialFiles/\n 'Core-Count': 1\n 'Socket-Count': 1\n " "SMT: 1\n BogoMIPS: 0\n Features: ''\n 'CPU-Implementer': 0x0\n " "'CPU-Architecture': 0\n 'CPU-Variant': 0x0\n 'CPU-Part': 0x0\n " "'CPU-Revision': 0\n 'Package-Count': 1\n"; @@ -359,4 +364,47 @@ TEST(ConfigTest, multipleWildNodes) { "instance of key HEAD"); } +// Test that, using a file path, a config can be set from a yaml file +TEST(ConfigTest, configFromFile) { + std::string filePath = SIMENG_SOURCE_DIR "/configs/a64fx.yaml"; + simeng::config::SimInfo::setConfig(filePath); + EXPECT_EQ(simeng::config::SimInfo::getConfigPath(), filePath); + EXPECT_EQ(simeng::config::SimInfo::getISA(), simeng::config::ISA::AArch64); + EXPECT_EQ(simeng::config::SimInfo::getISAString(), "AArch64"); + EXPECT_EQ(simeng::config::SimInfo::getSimMode(), + simeng::config::SimulationMode::Outoforder); + EXPECT_EQ(simeng::config::SimInfo::getSimModeStr(), "Out-of-Order"); + std::vector sysRegisterEnums = { + arm64_sysreg::ARM64_SYSREG_DCZID_EL0, + arm64_sysreg::ARM64_SYSREG_FPCR, + arm64_sysreg::ARM64_SYSREG_FPSR, + arm64_sysreg::ARM64_SYSREG_TPIDR_EL0, + arm64_sysreg::ARM64_SYSREG_MIDR_EL1, + arm64_sysreg::ARM64_SYSREG_CNTVCT_EL0, + arm64_sysreg::ARM64_SYSREG_PMCCNTR_EL0, + arm64_sysreg::ARM64_SYSREG_SVCR}; + EXPECT_EQ(simeng::config::SimInfo::getSysRegVec(), sysRegisterEnums); + std::vector archRegStruct = { + {8, 32}, + {256, 32}, + {32, 17}, + {1, 1}, + {8, static_cast(sysRegisterEnums.size())}, + {256, 16}}; + EXPECT_EQ(simeng::config::SimInfo::getArchRegStruct(), archRegStruct); + std::vector physRegStruct = { + {8, 96}, + {256, 128}, + {32, 48}, + {1, 128}, + {8, static_cast(sysRegisterEnums.size())}, + {256, 16}}; + EXPECT_EQ(simeng::config::SimInfo::getPhysRegStruct(), physRegStruct); + std::vector physRegQuants = { + 96, 128, 48, 128, static_cast(sysRegisterEnums.size()), 16}; + EXPECT_EQ(simeng::config::SimInfo::getPhysRegQuantities(), physRegQuants); +} +// getPhysRegStruct() +// getPhysRegQuantities() + } // namespace diff --git a/test/regression/RegressionTest.cc b/test/regression/RegressionTest.cc index 37655d01e3..5b43ac6cd0 100644 --- a/test/regression/RegressionTest.cc +++ b/test/regression/RegressionTest.cc @@ -75,7 +75,9 @@ void RegressionTest::run(const char* source, const char* triple, std::unique_ptr dataMemory; // Create the OS kernel and the process - simeng::kernel::Linux kernel; + simeng::kernel::Linux kernel( + simeng::config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"] + .as()); kernel.createProcess(*process_); // Populate the heap with initial data (specified by the test being run). diff --git a/test/regression/aarch64/instructions/float.cc b/test/regression/aarch64/instructions/float.cc index 10d3150e1c..23c174407f 100644 --- a/test/regression/aarch64/instructions/float.cc +++ b/test/regression/aarch64/instructions/float.cc @@ -533,12 +533,18 @@ TEST_P(InstFloat, fcvt) { } TEST_P(InstFloat, fcvtzu) { - initialHeapData_.resize(32); + initialHeapData_.resize(80); double* dheap = reinterpret_cast(initialHeapData_.data()); dheap[0] = 1.0; dheap[1] = -42.76; dheap[2] = -0.125; dheap[3] = 321.5; + dheap[4] = std::nan(0); + dheap[5] = -std::nan(0); + dheap[6] = INFINITY; + dheap[7] = -INFINITY; + dheap[8] = 4294967296.0; // uint32_max + 1 + dheap[9] = 18446744073709551616.0; // uint64_max + 1 // Double to uint32 RUN_AARCH64(R"( @@ -549,15 +555,30 @@ TEST_P(InstFloat, fcvtzu) { ldp d0, d1, [x0] ldp d2, d3, [x0, #16] + ldp d4, d5, [x0, #32] + ldp d6, d7, [x0, #48] + ldp d8, d9, [x0, #64] fcvtzu w0, d0 fcvtzu w1, d1 fcvtzu w2, d2 fcvtzu w3, d3 + fcvtzu w4, d4 + fcvtzu w5, d5 + fcvtzu w6, d6 + fcvtzu w7, d7 + fcvtzu w8, d8 + fcvtzu w9, d9 )"); EXPECT_EQ((getGeneralRegister(0)), 1); - EXPECT_EQ((getGeneralRegister(1)), -42); + EXPECT_EQ((getGeneralRegister(1)), 0); EXPECT_EQ((getGeneralRegister(2)), 0); EXPECT_EQ((getGeneralRegister(3)), 321); + EXPECT_EQ((getGeneralRegister(4)), 0); + EXPECT_EQ((getGeneralRegister(5)), 0); + EXPECT_EQ((getGeneralRegister(6)), UINT32_MAX); + EXPECT_EQ((getGeneralRegister(7)), 0); + EXPECT_EQ((getGeneralRegister(8)), UINT32_MAX); + EXPECT_EQ((getGeneralRegister(9)), UINT32_MAX); // Double to uint64 RUN_AARCH64(R"( @@ -568,17 +589,32 @@ TEST_P(InstFloat, fcvtzu) { ldp d0, d1, [x0] ldp d2, d3, [x0, #16] + ldp d4, d5, [x0, #32] + ldp d6, d7, [x0, #48] + ldp d8, d9, [x0, #64] fcvtzu x0, d0 fcvtzu x1, d1 fcvtzu x2, d2 fcvtzu x3, d3 + fcvtzu x4, d4 + fcvtzu x5, d5 + fcvtzu x6, d6 + fcvtzu x7, d7 + fcvtzu x8, d8 + fcvtzu x9, d9 )"); EXPECT_EQ((getGeneralRegister(0)), 1); - EXPECT_EQ((getGeneralRegister(1)), -42); + EXPECT_EQ((getGeneralRegister(1)), 0); EXPECT_EQ((getGeneralRegister(2)), 0); EXPECT_EQ((getGeneralRegister(3)), 321); + EXPECT_EQ((getGeneralRegister(4)), 0); + EXPECT_EQ((getGeneralRegister(5)), 0); + EXPECT_EQ((getGeneralRegister(6)), UINT64_MAX); + EXPECT_EQ((getGeneralRegister(7)), 0); + EXPECT_EQ((getGeneralRegister(8)), 4294967296); + EXPECT_EQ((getGeneralRegister(9)), UINT64_MAX); - // Double to implicit_cast(uint64) + // Double to scalar uint64 RUN_AARCH64(R"( # Get heap address mov x0, 0 @@ -587,25 +623,44 @@ TEST_P(InstFloat, fcvtzu) { ldp d0, d1, [x0] ldp d2, d3, [x0, #16] + ldp d4, d5, [x0, #32] + ldp d6, d7, [x0, #48] + ldp d8, d9, [x0, #64] fcvtzu d10, d0 fcvtzu d11, d1 fcvtzu d12, d2 fcvtzu d13, d3 + fcvtzu d14, d4 + fcvtzu d15, d5 + fcvtzu d16, d6 + fcvtzu d17, d7 + fcvtzu d18, d8 + fcvtzu d19, d9 )"); // Values verified on A64FX via simple assembly test kernel - double a = 4.9406564584124654e-324; - double b = 0.0; - double c = 1.5859507231504014e-321; - CHECK_NEON(10, double, {a, 0.0}); - CHECK_NEON(11, double, {b, 0.0}); - CHECK_NEON(12, double, {b, 0.0}); - CHECK_NEON(13, double, {c, 0.0}); + CHECK_NEON(10, uint64_t, {1, 0}); + CHECK_NEON(11, uint64_t, {0, 0}); + CHECK_NEON(12, uint64_t, {0, 0}); + CHECK_NEON(13, uint64_t, {321, 0}); + CHECK_NEON(14, uint64_t, {0, 0}); + CHECK_NEON(15, uint64_t, {0, 0}); + CHECK_NEON(16, uint64_t, {UINT64_MAX, 0}); + CHECK_NEON(17, uint64_t, {0, 0}); + CHECK_NEON(18, uint64_t, {4294967296, 0}); + CHECK_NEON(19, uint64_t, {UINT64_MAX, 0}); float* fheap = reinterpret_cast(initialHeapData_.data()); - fheap[0] = 1.0; - fheap[1] = -42.76; - fheap[2] = -0.125; - fheap[3] = 321.5; + fheap[0] = 1.0f; + fheap[1] = -42.76f; + fheap[2] = -0.125f; + fheap[3] = 321.5f; + fheap[4] = std::nanf(0); + fheap[5] = -std::nanf(0); + fheap[6] = INFINITY; + fheap[7] = -INFINITY; + fheap[8] = 4294967296.0; // uint32_max + 1 + fheap[9] = 18446744073709551616.0; // uint64_max + 1 + // Float to uint32 RUN_AARCH64(R"( # Get heap address @@ -615,15 +670,30 @@ TEST_P(InstFloat, fcvtzu) { ldp s0, s1, [x0] ldp s2, s3, [x0, #8] + ldp s4, s5, [x0, #16] + ldp s6, s7, [x0, #24] + ldp s8, s9, [x0, #32] fcvtzu w0, s0 fcvtzu w1, s1 fcvtzu w2, s2 fcvtzu w3, s3 + fcvtzu w4, s4 + fcvtzu w5, s5 + fcvtzu w6, s6 + fcvtzu w7, s7 + fcvtzu w8, s8 + fcvtzu w9, s9 )"); EXPECT_EQ((getGeneralRegister(0)), 1); - EXPECT_EQ((getGeneralRegister(1)), -42); + EXPECT_EQ((getGeneralRegister(1)), 0); EXPECT_EQ((getGeneralRegister(2)), 0); EXPECT_EQ((getGeneralRegister(3)), 321); + EXPECT_EQ((getGeneralRegister(4)), 0); + EXPECT_EQ((getGeneralRegister(5)), 0); + EXPECT_EQ((getGeneralRegister(6)), UINT32_MAX); + EXPECT_EQ((getGeneralRegister(7)), 0); + EXPECT_EQ((getGeneralRegister(8)), UINT32_MAX); + EXPECT_EQ((getGeneralRegister(9)), UINT32_MAX); // Float to uint64 RUN_AARCH64(R"( @@ -634,15 +704,30 @@ TEST_P(InstFloat, fcvtzu) { ldp s0, s1, [x0] ldp s2, s3, [x0, #8] + ldp s4, s5, [x0, #16] + ldp s6, s7, [x0, #24] + ldp s8, s9, [x0, #32] fcvtzu x0, s0 fcvtzu x1, s1 fcvtzu x2, s2 fcvtzu x3, s3 + fcvtzu x4, s4 + fcvtzu x5, s5 + fcvtzu x6, s6 + fcvtzu x7, s7 + fcvtzu x8, s8 + fcvtzu x9, s9 )"); EXPECT_EQ((getGeneralRegister(0)), 1); - EXPECT_EQ((getGeneralRegister(1)), -42); + EXPECT_EQ((getGeneralRegister(1)), 0); EXPECT_EQ((getGeneralRegister(2)), 0); EXPECT_EQ((getGeneralRegister(3)), 321); + EXPECT_EQ((getGeneralRegister(4)), 0); + EXPECT_EQ((getGeneralRegister(5)), 0); + EXPECT_EQ((getGeneralRegister(6)), UINT64_MAX); + EXPECT_EQ((getGeneralRegister(7)), 0); + EXPECT_EQ((getGeneralRegister(8)), 4294967296); + EXPECT_EQ((getGeneralRegister(9)), UINT64_MAX); } TEST_P(InstFloat, fdiv) { diff --git a/test/regression/aarch64/instructions/sve.cc b/test/regression/aarch64/instructions/sve.cc index 6982fb8c9e..39a95ee4a3 100644 --- a/test/regression/aarch64/instructions/sve.cc +++ b/test/regression/aarch64/instructions/sve.cc @@ -4065,7 +4065,7 @@ TEST_P(InstSve, frintn) { initialHeapData_.resize(VL / 8); float* fheap = reinterpret_cast(initialHeapData_.data()); std::vector fsrcA = {1.0f, -42.5f, -0.125f, 0.0f, - 40.5f, -684.72f, -0.15f, 107.86f}; + 41.5f, -684.72f, -0.15f, 107.86f}; std::vector fsrcB = {-34.5f, -0.917f, 0.0f, 80.72f, -125.5f, -0.01f, 701.90f, 7.5f}; fillHeapCombined(fheap, fsrcA, fsrcB, VL / 32); @@ -4084,18 +4084,19 @@ TEST_P(InstSve, frintn) { ptrue p0.s whilelo p1.s, xzr, x2 - dup z0.s, #15 - dup z1.s, #13 + fdup z0.s, #2.0 + fdup z1.s, #3.0 ld1w {z2.s}, p0/z, [x0, x1, lsl #2] frintn z0.s, p0/m, z2.s frintn z1.s, p1/m, z2.s )"); - std::vector results32A = {1, -42, 0, 0, 40, -685, 0, 108}; - std::vector results32B = {-34, -1, 0, 81, -126, 0, 702, 8}; - CHECK_NEON(0, int32_t, - fillNeonCombined(results32A, results32B, VL / 8)); - CHECK_NEON(1, int32_t, fillNeonCombined(results32A, {13}, VL / 8)); + std::vector results32A = {1.0f, -42.0f, 0.0f, 0.0f, + 42.0f, -685.0f, 0.0f, 108.0f}; + std::vector results32B = {-34.0f, -1.0f, 0.0f, 81.0f, + -126.0f, 0.0f, 702.0f, 8.0f}; + CHECK_NEON(0, float, fillNeonCombined(results32A, results32B, VL / 8)); + CHECK_NEON(1, float, fillNeonCombined(results32A, {3.0}, VL / 8)); // 64-bit initialHeapData_.resize(VL / 8); @@ -4118,18 +4119,18 @@ TEST_P(InstSve, frintn) { ptrue p0.d whilelo p1.d, xzr, x2 - dup z0.d, #15 - dup z1.d, #13 + fdup z0.d, #2.0 + fdup z1.d, #3.0 ld1d {z2.d}, p0/z, [x0, x1, lsl #3] frintn z0.d, p0/m, z2.d frintn z1.d, p1/m, z2.d )"); - std::vector results64A = {1, -42, 0, 0}; - std::vector results64B = {40, -685, -4, 108}; - CHECK_NEON(0, int64_t, - fillNeonCombined(results64A, results64B, VL / 8)); - CHECK_NEON(1, int64_t, fillNeonCombined(results64A, {13}, VL / 8)); + std::vector results64A = {1.0, -42.0, 0.0, 0.0}; + std::vector results64B = {40.0, -685.0, -4.0, 108.0}; + CHECK_NEON(0, double, + fillNeonCombined(results64A, results64B, VL / 8)); + CHECK_NEON(1, double, fillNeonCombined(results64A, {3.0}, VL / 8)); } TEST_P(InstSve, fsqrt) { diff --git a/test/unit/ArchitecturalRegisterFileSetTest.cc b/test/unit/ArchitecturalRegisterFileSetTest.cc new file mode 100644 index 0000000000..1529ef1cea --- /dev/null +++ b/test/unit/ArchitecturalRegisterFileSetTest.cc @@ -0,0 +1,41 @@ +#include "gtest/gtest.h" +#include "simeng/ArchitecturalRegisterFileSet.hh" + +namespace simeng { +namespace pipeline { + +class ArchitecturalRegisterFileSetTest : public ::testing::Test { + public: + ArchitecturalRegisterFileSetTest() + : physRegFileSet(regFileStruct), archRegFileSet(physRegFileSet) {} + + protected: + const std::vector regFileStruct = { + {8, 10}, {24, 15}, {256, 31}}; + + RegisterFileSet physRegFileSet; + + ArchitecturalRegisterFileSet archRegFileSet; +}; + +// Ensure we can read and write values to the architectural register file +TEST_F(ArchitecturalRegisterFileSetTest, readWrite) { + for (uint8_t i = 0; i < regFileStruct.size(); i++) { + const uint16_t regSize = regFileStruct[i].bytes; + const uint16_t maxRegTag = regFileStruct[i].quantity - 1; + const Register r0 = {i, 0}; + const Register rMax = {i, maxRegTag}; + + EXPECT_EQ(archRegFileSet.get(r0), RegisterValue(0, regSize)); + EXPECT_EQ(archRegFileSet.get(rMax), RegisterValue(0, regSize)); + + archRegFileSet.set(r0, RegisterValue(20, regSize)); + archRegFileSet.set(rMax, RegisterValue(40, regSize)); + + EXPECT_EQ(archRegFileSet.get(r0), RegisterValue(20, regSize)); + EXPECT_EQ(archRegFileSet.get(rMax), RegisterValue(40, regSize)); + } +} + +} // namespace pipeline +} // namespace simeng \ No newline at end of file diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index a3e400aad2..fd1e4f9882 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -1,25 +1,45 @@ set(TEST_SOURCES + aarch64/ArchInfoTest.cc + aarch64/ArchitectureTest.cc + aarch64/AuxiliaryFunctionsTest.cc + aarch64/ExceptionHandlerTest.cc + aarch64/InstructionTest.cc + riscv/ArchInfoTest.cc + riscv/ArchitectureTest.cc + riscv/ExceptionHandlerTest.cc + riscv/InstructionTest.cc pipeline/A64FXPortAllocatorTest.cc pipeline/BalancedPortAllocatorTest.cc - pipeline/ExecuteUnitTest.cc pipeline/DecodeUnitTest.cc + pipeline/DispatchIssueUnitTest.cc pipeline/ExecuteUnitTest.cc pipeline/FetchUnitTest.cc pipeline/LoadStoreQueueTest.cc + pipeline/M1PortAllocatorTest.cc + pipeline/MappedRegisterFileSetTest.cc pipeline/PipelineBufferTest.cc pipeline/RegisterAliasTableTest.cc + pipeline/RenameUnitTest.cc pipeline/ReorderBufferTest.cc pipeline/WritebackUnitTest.cc + ArchitecturalRegisterFileSetTest.cc + ElfTest.cc + FixedLatencyMemoryInterfaceTest.cc + FlatMemoryInterfaceTest.cc GenericPredictorTest.cc - ISATest.cc - RegisterValueTest.cc + OSTest.cc PoolTest.cc - ShiftValueTest.cc - LatencyMemoryInterfaceTest.cc + ProcessTest.cc + RegisterFileSetTest.cc + RegisterValueTest.cc + SpecialFileDirGenTest.cc ) add_executable(unittests ${TEST_SOURCES}) +configure_file(${capstone_SOURCE_DIR}/arch/AArch64/AArch64GenInstrInfo.inc AArch64GenInstrInfo.inc COPYONLY) +configure_file(${capstone_SOURCE_DIR}/arch/RISCV/RISCVGenInstrInfo.inc RISCVGenInstrInfo.inc COPYONLY) + target_include_directories(unittests PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) target_include_directories(unittests PUBLIC ${PROJECT_SOURCE_DIR}/src/lib) target_link_libraries(unittests libsimeng) diff --git a/test/unit/ConfigInit.hh b/test/unit/ConfigInit.hh new file mode 100644 index 0000000000..32b3c6ef6a --- /dev/null +++ b/test/unit/ConfigInit.hh @@ -0,0 +1,18 @@ +#pragma once + +#include "simeng/config/SimInfo.hh" +#include "simeng/version.hh" + +namespace simeng { + +// This small class' purpose is to initialise the SimInfo config before the +// initialisation of a test class +class ConfigInit { + public: + ConfigInit(config::ISA isa, std::string configAdditions) { + config::SimInfo::generateDefault(isa, true); + config::SimInfo::addToConfig(configAdditions); + } +}; + +} // namespace simeng \ No newline at end of file diff --git a/test/unit/ElfTest.cc b/test/unit/ElfTest.cc new file mode 100644 index 0000000000..9635304bf3 --- /dev/null +++ b/test/unit/ElfTest.cc @@ -0,0 +1,67 @@ +#include "gmock/gmock.h" +#include "simeng/Elf.hh" +#include "simeng/version.hh" + +using ::testing::_; +using ::testing::HasSubstr; +using ::testing::Return; + +namespace simeng { + +class ElfTest : public testing::Test { + public: + ElfTest() {} + + protected: + const std::string knownElfFilePath = + SIMENG_SOURCE_DIR "/test/unit/data/stream-aarch64.elf"; + + const uint64_t known_entryPoint = 4206008; + const uint16_t known_e_phentsize = 56; + const uint16_t known_e_phnum = 6; + const uint64_t known_phdrTableAddress = 4194368; + const uint64_t known_processImageSize = 5040480; + + char* unwrappedProcImgPtr; +}; + +// Test that a valid ELF file can be created +TEST_F(ElfTest, validElf) { + Elf elf(knownElfFilePath, &unwrappedProcImgPtr); + + EXPECT_TRUE(elf.isValid()); + EXPECT_EQ(elf.getEntryPoint(), known_entryPoint); + EXPECT_EQ(elf.getPhdrEntrySize(), known_e_phentsize); + EXPECT_EQ(elf.getNumPhdr(), known_e_phnum); + EXPECT_EQ(elf.getPhdrTableAddress(), known_phdrTableAddress); + EXPECT_EQ(elf.getProcessImageSize(), known_processImageSize); +} + +// Test that wrong filepath results in invalid ELF +TEST_F(ElfTest, invalidElf) { + Elf elf(SIMENG_SOURCE_DIR "/test/bogus_file_path___--__--__", + &unwrappedProcImgPtr); + EXPECT_FALSE(elf.isValid()); +} + +// Test that non-ELF file is not accepted +TEST_F(ElfTest, nonElf) { + testing::internal::CaptureStderr(); + Elf elf(SIMENG_SOURCE_DIR "/test/unit/ElfTest.cc", &unwrappedProcImgPtr); + EXPECT_FALSE(elf.isValid()); + EXPECT_THAT(testing::internal::GetCapturedStderr(), + HasSubstr("[SimEng:Elf] Elf magic does not match")); +} + +// Check that 32-bit ELF is not accepted +TEST_F(ElfTest, format32Elf) { + testing::internal::CaptureStderr(); + Elf elf(SIMENG_SOURCE_DIR "/test/unit/data/stream.rv32ima.elf", + &unwrappedProcImgPtr); + EXPECT_FALSE(elf.isValid()); + EXPECT_THAT( + testing::internal::GetCapturedStderr(), + HasSubstr("[SimEng:Elf] Unsupported architecture detected in Elf")); +} + +} // namespace simeng \ No newline at end of file diff --git a/test/unit/FixedLatencyMemoryInterfaceTest.cc b/test/unit/FixedLatencyMemoryInterfaceTest.cc new file mode 100644 index 0000000000..f5ea0d7581 --- /dev/null +++ b/test/unit/FixedLatencyMemoryInterfaceTest.cc @@ -0,0 +1,141 @@ +#include "gtest/gtest.h" +#include "simeng/FixedLatencyMemoryInterface.hh" + +namespace { + +class FixedLatencyMemoryInterfaceTest + : public testing::TestWithParam { + public: + FixedLatencyMemoryInterfaceTest() + : memory(memoryData.data(), memorySize, GetParam()) {} + + protected: + static constexpr uint16_t memorySize = 4; + std::array memoryData = {(char)0xFE, (char)0xCA, (char)0xBA, + (char)0xAB}; + + simeng::RegisterValue value = {0xDEADBEEF, 4}; + simeng::RegisterValue value_oversized = {0xDEADBEEFDEADBEEF, 8}; + simeng::MemoryAccessTarget target = {0, 4}; + simeng::MemoryAccessTarget target_OutOfBound1 = {1000, 4}; + simeng::MemoryAccessTarget target_OutOfBound2 = {0, 8}; + + const std::string writeOverflowStr = + "Attempted to write beyond memory limit."; + + simeng::FixedLatencyMemoryInterface memory; +}; + +// Test that we can read data and it completes after n cycles. +TEST_P(FixedLatencyMemoryInterfaceTest, FixedReadData) { + // Read a 32-bit value + memory.requestRead(target, 1); + EXPECT_TRUE(memory.hasPendingRequests()); + + // Tick n-1 times - request should still be pending + uint16_t latency = GetParam(); + for (int n = 0; n < latency - 1; n++) { + memory.tick(); + EXPECT_TRUE(memory.hasPendingRequests()); + } + + // Tick again - request should have completed + memory.tick(); + EXPECT_FALSE(memory.hasPendingRequests()); + + auto entries = memory.getCompletedReads(); + EXPECT_EQ(entries.size(), 1); + EXPECT_EQ(entries[0].requestId, 1); + EXPECT_EQ(entries[0].data, simeng::RegisterValue(0xABBACAFE, 4)); + EXPECT_EQ(entries[0].target, target); +} + +// Test that we can write data and it completes after n cycles. +TEST_P(FixedLatencyMemoryInterfaceTest, FixedWriteData) { + // Write a 32-bit value to memory + memory.requestWrite(target, value); + EXPECT_TRUE(memory.hasPendingRequests()); + + // Tick n-1 times - request should still be pending + uint16_t latency = GetParam(); + for (int n = 0; n < latency - 1; n++) { + memory.tick(); + EXPECT_TRUE(memory.hasPendingRequests()); + } + + // Tick again - request should have completed + memory.tick(); + EXPECT_FALSE(memory.hasPendingRequests()); + EXPECT_EQ(reinterpret_cast(memoryData.data())[0], 0xDEADBEEF); +} + +// Test that out-of-bounds memory reads are correctly handled. +TEST_P(FixedLatencyMemoryInterfaceTest, OutofBoundsRead) { + // Create a target such that address + size will overflow + memory.requestRead(target_OutOfBound1, 1); + + // Create a regular out-of-bounds target + memory.requestRead(target_OutOfBound2, 2); + + // Tick n-1 times - request shouldn't have completed + uint16_t latency = GetParam(); + for (int n = 0; n < latency - 1; n++) { + memory.tick(); + EXPECT_TRUE(memory.hasPendingRequests()); + } + + // Tick again - request should have completed + memory.tick(); + EXPECT_FALSE(memory.hasPendingRequests()); + + auto entries = memory.getCompletedReads(); + EXPECT_EQ(entries.size(), 2); + + auto overflowResult = entries[0]; + EXPECT_EQ(overflowResult.requestId, 1); + EXPECT_FALSE(overflowResult.data); + EXPECT_EQ(overflowResult.target, target_OutOfBound1); + + overflowResult = entries[1]; + EXPECT_EQ(overflowResult.requestId, 2); + EXPECT_FALSE(overflowResult.data); + EXPECT_EQ(overflowResult.target, target_OutOfBound2); +} + +// Test that out-of-bounds memory writes are correctly handled. +TEST_P(FixedLatencyMemoryInterfaceTest, OutofBoundsWrite_1) { + // Create a target such that address + size will overflow + memory.requestWrite(target_OutOfBound1, value); + + // Tick n-1 times - request shouldn't have completed + uint16_t latency = GetParam(); + for (int n = 0; n < latency - 1; n++) { + memory.tick(); + EXPECT_TRUE(memory.hasPendingRequests()); + } + + // Tick again - simulation should have come to a stop + ASSERT_DEATH(memory.tick(), writeOverflowStr); +} + +// Test that out-of-bounds memory writes are correctly handled. +TEST_P(FixedLatencyMemoryInterfaceTest, OutofBoundsWrite_2) { + // Create a regular out-of-bounds target + memory.requestWrite(target_OutOfBound2, value_oversized); + + // Tick n-1 times - request shouldn't have completed + uint16_t latency = GetParam(); + for (int n = 0; n < latency - 1; n++) { + memory.tick(); + EXPECT_TRUE(memory.hasPendingRequests()); + } + + // Tick again - simulation should have come to a stop + ASSERT_DEATH(memory.tick(), writeOverflowStr); +} + +INSTANTIATE_TEST_SUITE_P(FixedLatencyMemoryInterfaceTests, + FixedLatencyMemoryInterfaceTest, + ::testing::Values(2, 4)); + +} // namespace diff --git a/test/unit/FlatMemoryInterfaceTest.cc b/test/unit/FlatMemoryInterfaceTest.cc new file mode 100644 index 0000000000..2348588ec5 --- /dev/null +++ b/test/unit/FlatMemoryInterfaceTest.cc @@ -0,0 +1,81 @@ +#include "gtest/gtest.h" +#include "simeng/FlatMemoryInterface.hh" + +namespace { + +class FlatMemoryInterfaceTest : public testing::Test { + public: + FlatMemoryInterfaceTest() : memory(memoryData.data(), memorySize) {} + + protected: + static constexpr uint16_t memorySize = 4; + std::array memoryData = {(char)0xFE, (char)0xCA, (char)0xBA, + (char)0xAB}; + + simeng::RegisterValue value = {0xDEADBEEF, 4}; + simeng::RegisterValue value_oversized = {0xDEADBEEFDEADBEEF, 8}; + simeng::MemoryAccessTarget target = {0, 4}; + simeng::MemoryAccessTarget target_OutOfBound1 = {1000, 4}; + simeng::MemoryAccessTarget target_OutOfBound2 = {0, 8}; + + const std::string writeOverflowStr = + "Attempted to write beyond memory limit."; + + simeng::FlatMemoryInterface memory; +}; + +// Test that we can read data and it completes after zero cycles. +TEST_F(FlatMemoryInterfaceTest, FixedReadData) { + // Read a 32-bit value + memory.requestRead(target, 1); + auto entries = memory.getCompletedReads(); + EXPECT_EQ(entries.size(), 1); + EXPECT_EQ(entries[0].requestId, 1); + EXPECT_EQ(entries[0].data, simeng::RegisterValue(0xABBACAFE, 4)); + EXPECT_EQ(entries[0].target, target); +} + +// Test that we can write data and it completes after zero cycles. +TEST_F(FlatMemoryInterfaceTest, FixedWriteData) { + // Write a 32-bit value to memory + memory.requestWrite(target, value); + EXPECT_EQ(reinterpret_cast(memoryData.data())[0], 0xDEADBEEF); +} + +// Test that out-of-bounds memory reads are correctly handled. +TEST_F(FlatMemoryInterfaceTest, OutofBoundsRead) { + // Create a target such that address + size will overflow + memory.requestRead(target_OutOfBound1, 1); + + // Create a regular out-of-bounds target + memory.requestRead(target_OutOfBound2, 2); + + auto entries = memory.getCompletedReads(); + EXPECT_EQ(entries.size(), 2); + + auto overflowResult = entries[0]; + EXPECT_EQ(overflowResult.requestId, 1); + EXPECT_FALSE(overflowResult.data); + EXPECT_EQ(overflowResult.target, target_OutOfBound1); + + overflowResult = entries[1]; + EXPECT_EQ(overflowResult.requestId, 2); + EXPECT_FALSE(overflowResult.data); + EXPECT_EQ(overflowResult.target, target_OutOfBound2); +} + +// Test that out-of-bounds memory writes are correctly handled. +TEST_F(FlatMemoryInterfaceTest, OutofBoundsWrite_1) { + // Create a target such that address + size will overflow + ASSERT_DEATH(memory.requestWrite(target_OutOfBound1, value), + writeOverflowStr); +} + +// Test that out-of-bounds memory writes are correctly handled. +TEST_F(FlatMemoryInterfaceTest, OutofBoundsWrite_2) { + // Create a regular out-of-bounds target + ASSERT_DEATH(memory.requestWrite(target_OutOfBound2, value_oversized), + writeOverflowStr); +} + +} // namespace diff --git a/test/unit/GenericPredictorTest.cc b/test/unit/GenericPredictorTest.cc index e5ba2a665e..898e7e93e2 100644 --- a/test/unit/GenericPredictorTest.cc +++ b/test/unit/GenericPredictorTest.cc @@ -157,4 +157,42 @@ TEST_F(GenericPredictorTest, GlobalIndexing) { predictor.update(0x1F, true, 0xBA, BranchType::Conditional); } +// Test Flush of RAS functionality +TEST_F(GenericPredictorTest, flush) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: {BTB-Tag-Bits: 11, Saturating-Count-Bits: 2, " + "Global-History-Length: 10, RAS-entries: 10, Fallback-Static-Predictor: " + "Always-Taken}}"); + auto predictor = simeng::GenericPredictor(); + // Add some entries to the RAS + auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 16); + prediction = predictor.predict(24, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 32); + prediction = predictor.predict(40, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 48); + + // Start getting entries from RAS + prediction = predictor.predict(52, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 44); + prediction = predictor.predict(36, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 28); + + // Flush address + predictor.flush(36); + + // Continue getting entries from RAS + prediction = predictor.predict(20, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 28); + prediction = predictor.predict(16, BranchType::Return, 0); + EXPECT_TRUE(prediction.taken); + EXPECT_EQ(prediction.target, 12); +} + } // namespace simeng diff --git a/test/unit/ISATest.cc b/test/unit/ISATest.cc deleted file mode 100644 index e3fe5584a8..0000000000 --- a/test/unit/ISATest.cc +++ /dev/null @@ -1,27 +0,0 @@ -#include "gtest/gtest.h" -#include "simeng/RegisterFileSet.hh" -#include "simeng/arch/aarch64/Architecture.hh" -#include "simeng/arch/aarch64/Instruction.hh" - -namespace { - -// Test that we can create an AArch64 Architecture object -TEST(ISATest, CreateAArch64) { - simeng::kernel::Linux kernel; - simeng::config::SimInfo::addToConfig("{Core: {Micro-Operations: True}}"); - - std::unique_ptr isa = - std::make_unique(kernel); -} - -// Test that we can set a value in a register file set -TEST(ISATest, CreateRegisterFileSet) { - auto registerFileSet = simeng::RegisterFileSet({{8, 32}, {16, 32}, {1, 1}}); - auto reg = simeng::Register{simeng::arch::aarch64::RegisterType::GENERAL, 0}; - - registerFileSet.set(reg, static_cast(42)); - - EXPECT_TRUE(registerFileSet.get(reg)); -} - -} // namespace diff --git a/test/unit/LatencyMemoryInterfaceTest.cc b/test/unit/LatencyMemoryInterfaceTest.cc deleted file mode 100644 index e728b9bf76..0000000000 --- a/test/unit/LatencyMemoryInterfaceTest.cc +++ /dev/null @@ -1,63 +0,0 @@ -#include "gtest/gtest.h" -#include "simeng/FixedLatencyMemoryInterface.hh" - -namespace { - -// Test that we can write data and it completes after a number of cycles. -TEST(LatencyMemoryInterfaceTest, FixedWriteData) { - // Create a memory interface with a two cycle latency - uint32_t memoryData = 0; - simeng::FixedLatencyMemoryInterface memory( - reinterpret_cast(&memoryData), 4, 2); - EXPECT_FALSE(memory.hasPendingRequests()); - - // Write a 32-bit value to memory - // Should ignore the 7 cycle latency and opt for the interface defined latency - simeng::MemoryAccessTarget target = {0, 4}; - simeng::RegisterValue value = (uint32_t)0xDEADBEEF; - memory.requestWrite(target, value); - EXPECT_TRUE(memory.hasPendingRequests()); - - // Tick once - request should still be pending - memory.tick(); - EXPECT_TRUE(memory.hasPendingRequests()); - - // Tick again - request should have completed - memory.tick(); - EXPECT_FALSE(memory.hasPendingRequests()); - EXPECT_EQ(memoryData, 0xDEADBEEF); -} - -// Test that out-of-bounds memory reads are correctly handled. -TEST(LatencyMemoryInterfaceTest, OutofBoundsRead) { - uint32_t memoryData = 0; - simeng::FixedLatencyMemoryInterface memory( - reinterpret_cast(&memoryData), 4, 1); - - // Create a target such that address + size will overflow - simeng::MemoryAccessTarget overflowTarget = {UINT64_MAX, 4}; - memory.requestRead(overflowTarget, 1); - - // Create a regular out-of-bounds target - simeng::MemoryAccessTarget target = {0, 8}; - memory.requestRead(target, 2); - - // Tick once - request should have completed - memory.tick(); - EXPECT_FALSE(memory.hasPendingRequests()); - - auto entries = memory.getCompletedReads(); - EXPECT_EQ(entries.size(), 2); - - auto overflowResult = entries[0]; - EXPECT_EQ(overflowResult.requestId, 1); - EXPECT_EQ(overflowResult.data, simeng::RegisterValue()); - EXPECT_EQ(overflowResult.target, overflowTarget); - - auto result = entries[1]; - EXPECT_EQ(result.requestId, 2); - EXPECT_EQ(result.data, simeng::RegisterValue()); - EXPECT_EQ(result.target, target); -} - -} // namespace diff --git a/test/unit/MockCore.hh b/test/unit/MockCore.hh new file mode 100644 index 0000000000..524f9f5d20 --- /dev/null +++ b/test/unit/MockCore.hh @@ -0,0 +1,20 @@ +#pragma once + +#include "gmock/gmock.h" +#include "simeng/Core.hh" + +namespace simeng { + +/** Mock implementation of the `Core` interface. */ +class MockCore : public Core { + public: + MOCK_METHOD0(tick, void()); + MOCK_CONST_METHOD0(hasHalted, bool()); + MOCK_CONST_METHOD0(getArchitecturalRegisterFileSet, + const ArchitecturalRegisterFileSet&()); + MOCK_CONST_METHOD0(getInstructionsRetiredCount, uint64_t()); + MOCK_CONST_METHOD0(getSystemTimer, uint64_t()); + MOCK_CONST_METHOD0(getStats, std::map()); +}; + +} // namespace simeng diff --git a/test/unit/MockInstruction.hh b/test/unit/MockInstruction.hh index 4d9757c24f..883c753413 100644 --- a/test/unit/MockInstruction.hh +++ b/test/unit/MockInstruction.hh @@ -8,7 +8,6 @@ namespace simeng { /** Mock implementation of the `Instruction` interface. */ class MockInstruction : public Instruction { public: - MOCK_CONST_METHOD0(getException, InstructionException()); MOCK_CONST_METHOD0(getSourceRegisters, const span()); MOCK_CONST_METHOD0(getSourceOperands, const span()); MOCK_CONST_METHOD0(getDestinationRegisters, const span()); @@ -22,6 +21,7 @@ class MockInstruction : public Instruction { MOCK_METHOD0(generateAddresses, span()); MOCK_METHOD2(supplyData, void(uint64_t address, const RegisterValue& data)); MOCK_CONST_METHOD0(getGeneratedAddresses, span()); + MOCK_CONST_METHOD0(hasAllData, bool()); MOCK_CONST_METHOD0(getData, span()); MOCK_CONST_METHOD0(checkEarlyBranchMisprediction, @@ -33,10 +33,10 @@ class MockInstruction : public Instruction { MOCK_CONST_METHOD0(isStoreData, bool()); MOCK_CONST_METHOD0(isLoad, bool()); MOCK_CONST_METHOD0(isBranch, bool()); - MOCK_CONST_METHOD0(isASIMD, bool()); - MOCK_CONST_METHOD0(isPredicate, bool()); MOCK_CONST_METHOD0(getGroup, uint16_t()); + MOCK_CONST_METHOD0(getLSQLatency, uint16_t()); + MOCK_METHOD0(getSupportedPorts, const std::vector&()); void setBranchResults(bool wasTaken, uint64_t targetAddress) { @@ -54,7 +54,13 @@ class MockInstruction : public Instruction { void setLatency(uint16_t cycles) { latency_ = cycles; } + void setLSQLatency(uint16_t cycles) { lsqExecutionLatency_ = cycles; } + void setStallCycles(uint16_t cycles) { stallCycles_ = cycles; } + + void setIsMicroOp(bool isMicroOp) { isMicroOp_ = isMicroOp; } + + void setIsLastMicroOp(bool isLastOp) { isLastMicroOp_ = isLastOp; } }; } // namespace simeng diff --git a/test/unit/MockPortAllocator.hh b/test/unit/MockPortAllocator.hh new file mode 100644 index 0000000000..19d7142b74 --- /dev/null +++ b/test/unit/MockPortAllocator.hh @@ -0,0 +1,21 @@ +#pragma once + +#include "gmock/gmock.h" +#include "simeng/pipeline/PortAllocator.hh" + +namespace simeng { +namespace pipeline { + +/** Mock implementation of the `PortAllocator` interface. */ +class MockPortAllocator : public pipeline::PortAllocator { + public: + MOCK_METHOD1(allocate, uint16_t(const std::vector& ports)); + MOCK_METHOD1(issued, void(uint16_t port)); + MOCK_METHOD1(deallocate, void(uint16_t port)); + MOCK_METHOD1(setRSSizeGetter, + void(std::function&)> rsSizes)); + MOCK_METHOD0(tick, void()); +}; + +} // namespace pipeline +} // namespace simeng diff --git a/test/unit/OSTest.cc b/test/unit/OSTest.cc new file mode 100644 index 0000000000..1b2f7803eb --- /dev/null +++ b/test/unit/OSTest.cc @@ -0,0 +1,77 @@ +#include "ConfigInit.hh" +#include "gtest/gtest.h" +#include "simeng/kernel/Linux.hh" +#include "simeng/kernel/LinuxProcess.hh" +#include "simeng/span.hh" + +namespace simeng { + +class OSTest : public testing::Test { + public: + OSTest() + : os(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"] + .as()), + proc_elf(simeng::kernel::LinuxProcess(cmdLine)), + proc_hex(simeng::span(reinterpret_cast(demoHex), + sizeof(demoHex))) {} + + protected: + ConfigInit configInit = ConfigInit( + config::ISA::AArch64, + R"YAML({Process-Image: {Heap-Size: 1073741824, Stack-Size: 1048576}})YAML"); + + const std::vector cmdLine = { + SIMENG_SOURCE_DIR "/test/unit/data/stream-aarch64.elf"}; + + simeng::kernel::Linux os; + simeng::kernel::LinuxProcess proc_elf; + simeng::kernel::LinuxProcess proc_hex; + + // A simple program used to test the functionality of creating a process with + // a stream of hex instructions. + uint32_t demoHex[7] = { + 0x320C03E0, // orr w0, wzr, #1048576 + 0x320003E1, // orr w0, wzr, #1 + 0x71000400, // subs w0, w0, #1 + 0x54FFFFC1, // b.ne -8 + // .exit: + 0xD2800000, // mov x0, #0 + 0xD2800BC8, // mov x8, #94 + 0xD4000001, // svc #0 + }; +}; + +// These tests verify the functionality of both the `createProcess()` and +// `getInitialStackPointer()` functions. All other functions for this class are +// syscalls and are tested in the Regression suite. +TEST_F(OSTest, processElf_stackPointer) { + os.createProcess(proc_elf); + // cmdLine[0] length will change depending on the host system so final stack + // pointer needs to be calculated manually + // cmdLineSize + 1 for null seperator + const uint64_t cmdLineSize = cmdLine[0].size() + 1; + // "OMP_NUM_THREADS=1" + 1 for null seperator + const uint64_t envStringsSize = 18; + // Size of initial stack frame as per LinuxProcess.cc:createStack() + // - (17 push_backs) * 8 + // https://www.win.tue.nl/~aeb/linux/hh/stack-layout.html + const uint64_t stackFrameSize = 17 * 8; + // cmd + Env needs +1 for null seperator + const uint64_t stackPointer = + proc_elf.getStackStart() - + kernel::alignToBoundary(cmdLineSize + envStringsSize + 1, 32) - + kernel::alignToBoundary(stackFrameSize, 32); + EXPECT_EQ(os.getInitialStackPointer(), stackPointer); + EXPECT_EQ(os.getInitialStackPointer(), proc_elf.getStackPointer()); +} + +TEST_F(OSTest, processHex_stackPointer) { + os.createProcess(proc_hex); + EXPECT_EQ(os.getInitialStackPointer(), 1074790240); + EXPECT_EQ(os.getInitialStackPointer(), proc_hex.getStackPointer()); +} + +// createProcess +// getInitialStackPointer + +} // namespace simeng diff --git a/test/unit/ProcessTest.cc b/test/unit/ProcessTest.cc new file mode 100644 index 0000000000..26858164e5 --- /dev/null +++ b/test/unit/ProcessTest.cc @@ -0,0 +1,119 @@ +#include "ConfigInit.hh" +#include "gtest/gtest.h" +#include "simeng/kernel/LinuxProcess.hh" +#include "simeng/version.hh" + +namespace simeng { + +class ProcessTest : public testing::Test { + public: + ProcessTest() {} + + protected: + ConfigInit configInit = ConfigInit( + config::ISA::AArch64, + R"YAML({Process-Image: {Heap-Size: 1073741824, Stack-Size: 1048576}})YAML"); + + const std::vector cmdLine = { + SIMENG_SOURCE_DIR "/test/unit/data/stream-aarch64.elf"}; + + // Program used when no executable is provided; counts down from + // 1024*1024, with an independent `orr` at the start of each branch. + uint32_t demoHex[7] = { + 0x320C03E0, // orr w0, wzr, #1048576 + 0x320003E1, // orr w0, wzr, #1 + 0x71000400, // subs w0, w0, #1 + 0x54FFFFC1, // b.ne -8 + // .exit: + 0xD2800000, // mov x0, #0 + 0xD2800BC8, // mov x8, #94 + 0xD4000001, // svc #0 + }; +}; + +TEST_F(ProcessTest, alignToBoundary) { + EXPECT_EQ(kernel::alignToBoundary(63, 64), 64); + EXPECT_EQ(kernel::alignToBoundary(1, 64), 64); + EXPECT_EQ(kernel::alignToBoundary(65, 64), 128); +} + +// Tests createProcess(), isValid(), and getPath() functions. +TEST_F(ProcessTest, createProcess_elf) { + kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine); + EXPECT_TRUE(proc.isValid()); + EXPECT_EQ(proc.getPath(), + SIMENG_SOURCE_DIR "/test/unit/data/stream-aarch64.elf"); +} + +// Tests createProcess(), isValid(), and getPath() functions. +TEST_F(ProcessTest, createProcess_hex) { + kernel::LinuxProcess proc = kernel::LinuxProcess( + span(reinterpret_cast(demoHex), sizeof(demoHex))); + EXPECT_TRUE(proc.isValid()); + EXPECT_EQ(proc.getPath(), "\0"); +} + +// Tests get{Heap, Stack, Mmap}Start() functions +TEST_F(ProcessTest, get_x_Start) { + kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine); + EXPECT_TRUE(proc.isValid()); + const uint64_t heapStart = 5040480; + uint64_t heapSize = + config::SimInfo::getConfig()["Process-Image"]["Heap-Size"].as(); + uint64_t stackSize = + config::SimInfo::getConfig()["Process-Image"]["Stack-Size"] + .as(); + EXPECT_EQ(proc.getHeapStart(), heapStart); + EXPECT_EQ(proc.getMmapStart(), + kernel::alignToBoundary(heapStart + ((heapSize + stackSize) / 2), + proc.getPageSize())); + EXPECT_EQ(proc.getStackStart(), heapStart + heapSize + stackSize); +} + +TEST_F(ProcessTest, getPageSize) { + kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine); + EXPECT_TRUE(proc.isValid()); + EXPECT_EQ(proc.getPageSize(), 4096); +} + +TEST_F(ProcessTest, getProcessImage) { + kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine); + EXPECT_TRUE(proc.isValid()); + EXPECT_NE(proc.getProcessImage(), nullptr); +} + +TEST_F(ProcessTest, getProcessImageSize) { + kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine); + EXPECT_TRUE(proc.isValid()); + EXPECT_EQ(proc.getProcessImageSize(), 1079830880); +} + +TEST_F(ProcessTest, getEntryPoint) { + kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine); + EXPECT_TRUE(proc.isValid()); + EXPECT_EQ(proc.getEntryPoint(), 4206008); +} + +TEST_F(ProcessTest, getStackPointer) { + kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine); + EXPECT_TRUE(proc.isValid()); + // cmdLine[0] length will change depending on the host system so final stack + // pointer needs to be calculated manually + // cmdLineSize + 1 for null seperator + const uint64_t cmdLineSize = cmdLine[0].size() + 1; + // "OMP_NUM_THREADS=1" + 1 for null seperator + const uint64_t envStringsSize = 18; + // Size of initial stack frame (17 push_backs) * 8 + const uint64_t stackFrameSize = 17 * 8; + // cmd + Env needs +1 for null seperator + const uint64_t stackPointer = + proc.getStackStart() - + kernel::alignToBoundary(cmdLineSize + envStringsSize + 1, 32) - + kernel::alignToBoundary(stackFrameSize, 32); + EXPECT_EQ(proc.getStackPointer(), stackPointer); +} + +} // namespace simeng + +// getEntryPoint +// getStackPointer \ No newline at end of file diff --git a/test/unit/RegisterFileSetTest.cc b/test/unit/RegisterFileSetTest.cc new file mode 100644 index 0000000000..ed8485eb61 --- /dev/null +++ b/test/unit/RegisterFileSetTest.cc @@ -0,0 +1,48 @@ +#include "gtest/gtest.h" +#include "simeng/RegisterFileSet.hh" + +namespace simeng { +namespace pipeline { + +class RegisterFileSetTest : public ::testing::Test { + public: + RegisterFileSetTest() : regFileSet(regFileStruct) {} + + protected: + const std::vector regFileStruct = { + {8, 10}, {24, 15}, {256, 31}}; + + RegisterFileSet regFileSet; +}; + +// Ensure RegisterFileSet is constructed correctly +TEST_F(RegisterFileSetTest, validConstruction) { + for (uint8_t i = 0; i < regFileStruct.size(); i++) { + for (uint16_t j = 0; j < regFileStruct[i].quantity; j++) { + const Register reg = {i, j}; + EXPECT_EQ(regFileSet.get(reg), RegisterValue(0, regFileStruct[i].bytes)); + } + } +} + +// Ensure we can read and write values to the register file +TEST_F(RegisterFileSetTest, readWrite) { + for (uint8_t i = 0; i < regFileStruct.size(); i++) { + const uint16_t regSize = regFileStruct[i].bytes; + const uint16_t maxRegTag = regFileStruct[i].quantity - 1; + const Register r0 = {i, 0}; + const Register rMax = {i, maxRegTag}; + + EXPECT_EQ(regFileSet.get(r0), RegisterValue(0, regSize)); + EXPECT_EQ(regFileSet.get(rMax), RegisterValue(0, regSize)); + + regFileSet.set(r0, RegisterValue(20, regSize)); + regFileSet.set(rMax, RegisterValue(40, regSize)); + + EXPECT_EQ(regFileSet.get(r0), RegisterValue(20, regSize)); + EXPECT_EQ(regFileSet.get(rMax), RegisterValue(40, regSize)); + } +} + +} // namespace pipeline +} // namespace simeng \ No newline at end of file diff --git a/test/unit/ShiftValueTest.cc b/test/unit/ShiftValueTest.cc deleted file mode 100644 index 8cfe74e731..0000000000 --- a/test/unit/ShiftValueTest.cc +++ /dev/null @@ -1,33 +0,0 @@ -#include "gtest/gtest.h" -#include "simeng/arch/aarch64/Instruction.hh" - -namespace { - -TEST(ShiftValueTest, ROR) { - const auto ARM64_SFT_ROR = 5; - - // 32-bit - const uint32_t a = 0x0000FFFF; - EXPECT_EQ(simeng::arch::aarch64::shiftValue(a, ARM64_SFT_ROR, 16), - 0xFFFF0000); - - const uint32_t b = 0xFFFF0000; - EXPECT_EQ(simeng::arch::aarch64::shiftValue(b, ARM64_SFT_ROR, 31), - 0xFFFE0001); - - EXPECT_EQ(simeng::arch::aarch64::shiftValue(b, ARM64_SFT_ROR, 0), 0xFFFF0000); - - // 64-bit - const uint64_t c = 0x00000000FFFFFFFF; - EXPECT_EQ(simeng::arch::aarch64::shiftValue(c, ARM64_SFT_ROR, 32), - 0xFFFFFFFF00000000); - - const uint64_t d = 0xFFFFFFFF00000000; - EXPECT_EQ(simeng::arch::aarch64::shiftValue(d, ARM64_SFT_ROR, 63), - 0xFFFFFFFE00000001); - - EXPECT_EQ(simeng::arch::aarch64::shiftValue(d, ARM64_SFT_ROR, 0), - 0xFFFFFFFF00000000); -} - -} // namespace \ No newline at end of file diff --git a/test/unit/SpecialFileDirGenTest.cc b/test/unit/SpecialFileDirGenTest.cc new file mode 100644 index 0000000000..745750b90f --- /dev/null +++ b/test/unit/SpecialFileDirGenTest.cc @@ -0,0 +1,136 @@ +#include "ConfigInit.hh" +#include "gmock/gmock.h" +#include "simeng/SpecialFileDirGen.hh" +#include "simeng/version.hh" + +namespace simeng { + +#define TEST_SPEC_FILE_DIR SIMENG_SOURCE_DIR "/test/unit/specialFiles/" + +class SpecialFileDirGenTest : public testing::Test { + public: + SpecialFileDirGenTest() {} + + protected: + ConfigInit configInit = ConfigInit(config::ISA::AArch64, + R"YAML({ + CPU-Info: { + Generate-Special-Dir: True, + Special-File-Dir-Path: )YAML" TEST_SPEC_FILE_DIR R"YAML(, + Core-Count: 1, + Socket-Count: 1, + SMT: 1, + BogoMIPS: 200.00, + Features: fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm fcma dcpop sve, + CPU-Implementer: 0x46, + CPU-Architecture: 8, + CPU-Variant: 0x1, + CPU-Part: 0x001, + CPU-Revision: 0, + Package-Count: 1 + } + })YAML"); + + SpecialFileDirGen specFile; + + const std::vector>> + allFiles_names_Lines = { + std::pair>( + "proc/cpuinfo", + {"processor : 0", "BogoMIPS : 200.00", + "Features : fp asimd evtstrm sha1 sha2 " + "crc32 atomics fphp asimdhp cpuid " + "asimdrdm fcma dcpop sve", + "CPU implementer : 0x46", "CPU architecture: 8", + "CPU variant : 0x1", "CPU part : 0x001", + "CPU revision : 0", ""}), + std::pair>( + "proc/stat", + {"cpu 0 0 0 0 0 0 0 0 0 0", "cpu0 0 0 0 0 0 0 0 0 0 0", + "intr 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " + "0 0 0 0 " + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " + "0 0 0 0 0 " + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " + "0 0 0 0 0 " + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " + "0 0 0 0 0 " + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " + "0 0 0 0 0 " + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", + "ctxt 0", "btime 0", "processes 0", "procs_running 1", + "procs_blocked 0", "softirq 0 0 0 0 0 0 0 0 0 0 0"}), + std::pair>( + "sys/devices/system/cpu/cpu0/topology/core_id", {"0"}), + std::pair>( + "sys/devices/system/cpu/cpu0/topology/physical_package_id", + {"0"}), + std::pair>( + "sys/devices/system/cpu/online", {"0-0"})}; +}; + +// Test that we can generate and delete special files to a custom directory +// (i.e. the one defined in the YAML string above) +TEST_F(SpecialFileDirGenTest, genAndDelete) { + // Make sure files currently do not exist + for (int i = 0; i < allFiles_names_Lines.size(); i++) { + EXPECT_FALSE( + std::ifstream(TEST_SPEC_FILE_DIR + std::get<0>(allFiles_names_Lines[i])) + .good()); + } + + // Generate files + specFile.GenerateSFDir(); + + // Validate files exist and are correct + for (int i = 0; i < allFiles_names_Lines.size(); i++) { + EXPECT_TRUE( + std::ifstream(TEST_SPEC_FILE_DIR + std::get<0>(allFiles_names_Lines[i])) + .good()); + std::ifstream file(TEST_SPEC_FILE_DIR + + std::get<0>(allFiles_names_Lines[i])); + const std::vector& knownLines = + std::get<1>(allFiles_names_Lines[i]); + std::string line; + int numOfLines = 0; + while (std::getline(file, line)) { + if (numOfLines > knownLines.size()) { + break; + } + EXPECT_EQ(line, knownLines[numOfLines]); + numOfLines++; + } + EXPECT_EQ(numOfLines, knownLines.size()); + } + + // Delete files + specFile.RemoveExistingSFDir(); + + // Make sure files don't exist + for (int i = 0; i < allFiles_names_Lines.size(); i++) { + EXPECT_FALSE( + std::ifstream(TEST_SPEC_FILE_DIR + std::get<0>(allFiles_names_Lines[i])) + .good()); + } +} + +// Test that a non-existant non-default special file directory causes the user +// to be notified when generation is set to False +TEST_F(SpecialFileDirGenTest, doesntExist) { + // Reset SimInfo Config + ASSERT_DEATH( + config::SimInfo::addToConfig( + "CPU-Info: {Generate-Special-Dir: False, " + "Special-File-Dir-Path: " SIMENG_BUILD_DIR "/thisDoesntExistDir/" + ", Core-Count: 1, Socket-Count: 1, SMT: 1, BogoMIPS: 200.00, " + "Features: " + "fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid " + "asimdrdm " + "fcma dcpop sve, CPU-Implementer: 0x46, CPU-Architecture: 8, " + "CPU-Variant: 0x1, CPU-Part: 0x001, CPU-Revision: 0, Package-Count: " + "1}}"), + "- Special File Directory '" SIMENG_BUILD_DIR + "/thisDoesntExistDir/' does not exist"); +} + +} // namespace simeng \ No newline at end of file diff --git a/test/unit/aarch64/ArchInfoTest.cc b/test/unit/aarch64/ArchInfoTest.cc new file mode 100644 index 0000000000..13978639e5 --- /dev/null +++ b/test/unit/aarch64/ArchInfoTest.cc @@ -0,0 +1,74 @@ +#include "gtest/gtest.h" +#include "simeng/arch/aarch64/ArchInfo.hh" +#include "simeng/config/SimInfo.hh" +#include "simeng/version.hh" + +namespace simeng { +namespace arch { +namespace aarch64 { + +class AArch64ArchInfoTest : public ::testing::Test { + public: + AArch64ArchInfoTest() { + simeng::config::SimInfo::setConfig(SIMENG_SOURCE_DIR + "/configs/a64fx_SME.yaml"); + } + + protected: + const std::vector sysRegisterEnums = { + arm64_sysreg::ARM64_SYSREG_DCZID_EL0, + arm64_sysreg::ARM64_SYSREG_FPCR, + arm64_sysreg::ARM64_SYSREG_FPSR, + arm64_sysreg::ARM64_SYSREG_TPIDR_EL0, + arm64_sysreg::ARM64_SYSREG_MIDR_EL1, + arm64_sysreg::ARM64_SYSREG_CNTVCT_EL0, + arm64_sysreg::ARM64_SYSREG_PMCCNTR_EL0, + arm64_sysreg::ARM64_SYSREG_SVCR}; + + const std::vector archRegStruct = { + {8, 32}, + {256, 32}, + {32, 17}, + {1, 1}, + {8, static_cast(sysRegisterEnums.size())}, + {256, 64}}; + + const std::vector physRegStruct = { + {8, 96}, + {256, 128}, + {32, 48}, + {1, 128}, + {8, static_cast(sysRegisterEnums.size())}, + {256, 128}}; + + const std::vector physRegQuants = { + 96, 128, 48, 128, static_cast(sysRegisterEnums.size()), 128}; +}; + +// Test for the getSysRegEnums() function +TEST_F(AArch64ArchInfoTest, getSysRegEnums) { + ArchInfo info = ArchInfo(config::SimInfo::getConfig()); + EXPECT_EQ(info.getSysRegEnums(), sysRegisterEnums); +} + +// Test for the getArchRegStruct() function +TEST_F(AArch64ArchInfoTest, getArchRegStruct) { + ArchInfo info = ArchInfo(config::SimInfo::getConfig()); + EXPECT_EQ(info.getArchRegStruct(), archRegStruct); +} + +// Test for the getPhysRegStruct() function +TEST_F(AArch64ArchInfoTest, getPhysRegStruct) { + ArchInfo info = ArchInfo(config::SimInfo::getConfig()); + EXPECT_EQ(info.getPhysRegStruct(), physRegStruct); +} + +// Test for the getPhysRegQuantities() function +TEST_F(AArch64ArchInfoTest, getPhysRegQuantities) { + ArchInfo info = ArchInfo(config::SimInfo::getConfig()); + EXPECT_EQ(info.getPhysRegQuantities(), physRegQuants); +} + +} // namespace aarch64 +} // namespace arch +} // namespace simeng \ No newline at end of file diff --git a/test/unit/aarch64/ArchitectureTest.cc b/test/unit/aarch64/ArchitectureTest.cc new file mode 100644 index 0000000000..63b2805ce3 --- /dev/null +++ b/test/unit/aarch64/ArchitectureTest.cc @@ -0,0 +1,240 @@ +#include + +#include "../ConfigInit.hh" +#include "gtest/gtest.h" +#include "simeng/CoreInstance.hh" +#include "simeng/RegisterFileSet.hh" +#include "simeng/arch/aarch64/Architecture.hh" +#include "simeng/arch/riscv/Architecture.hh" +#include "simeng/span.hh" +#include "simeng/version.hh" + +namespace simeng { +namespace arch { +namespace aarch64 { + +// AArch64 Tests +class AArch64ArchitectureTest : public testing::Test { + public: + AArch64ArchitectureTest() + : kernel(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"] + .as()) { + arch = std::make_unique(kernel); + kernel.createProcess(process); + } + + protected: + // Setting core model to complex OoO model to more verbosely test the + // Architecture class. + ConfigInit configInit = ConfigInit(config::ISA::AArch64, R"YAML({ + Core: { + Simulation-Mode: outoforder, + Vector-Length: 512, + Streaming-Vector-Length: 128 + }, + Ports: { + '0': {Portname: Port 0, Instruction-Group-Support: [FP, SVE]}, + '1': {Portname: Port 1, Instruction-Group-Support: [PREDICATE]}, + '2': {Portname: Port 2, Instruction-Group-Support: [INT_SIMPLE, INT_MUL, STORE_DATA]}, + '3': {Portname: Port 3, Instruction-Group-Support: [FP_SIMPLE, FP_MUL, SVE_SIMPLE, SVE_MUL]}, + '4': {Portname: Port 4, Instruction-Group-Support: [INT_SIMPLE, INT_DIV_OR_SQRT]}, + '5': {Portname: Port 5, Instruction-Group-Support: [LOAD, STORE_ADDRESS, INT_SIMPLE_ARTH_NOSHIFT, INT_SIMPLE_LOGICAL_NOSHIFT, INT_SIMPLE_CMP]}, + '6': {Portname: Port 6, Instruction-Group-Support: [LOAD, STORE_ADDRESS, INT_SIMPLE_ARTH_NOSHIFT, INT_SIMPLE_LOGICAL_NOSHIFT, INT_SIMPLE_CMP]}, + '7': {Portname: Port 7, Instruction-Group-Support: [BRANCH]} + }, + Reservation-Stations: { + '0': {Size: 20, Dispatch-Rate: 2, Ports: [Port 0, Port 1, Port 2]}, + '1': {Size: 20, Dispatch-Rate: 2, Ports: [Port 3, Port 4]}, + '2': {Size: 10, Dispatch-Rate: 1, Ports: [Port 5]}, + '3': {Size: 10, Dispatch-Rate: 1, Ports: [Port 6]}, + '4': {Size: 19, Dispatch-Rate: 1, Ports: [Port 7]}, + }, + Execution-Units: { + '0': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '1': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '2': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '3': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '4': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '5': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '6': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '7': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]} + }, + Latencies: { + '0': {Instruction-Groups: [INT], Execution-Latency: 2, Execution-Throughput: 2}, + '1': {Instruction-Groups: [INT_SIMPLE_ARTH_NOSHIFT, INT_SIMPLE_LOGICAL_NOSHIFT, INT_SIMPLE_CVT], Execution-Latency: 1, Execution-Throughput: 1}, + '2': {Instruction-Groups: [INT_MUL], Execution-Latency: 5, Execution-Throughput: 1}, + '3': {Instruction-Groups: [INT_DIV_OR_SQRT], Execution-Latency: 41, Execution-Throughput: 41}, + '4': {Instruction-Groups: [SCALAR_SIMPLE, VECTOR_SIMPLE_LOGICAL, SVE_SIMPLE_LOGICAL, VECTOR_SIMPLE_CMP, SVE_SIMPLE_CMP], Execution-Latency: 4, Execution-Throughput: 1}, + '5': {Instruction-Groups: [FP_DIV_OR_SQRT], Execution-Latency: 29, Execution-Throughput: 29}, + '6': {Instruction-Groups: [VECTOR_SIMPLE, SVE_SIMPLE, SCALAR_SIMPLE_CVT, FP_MUL, SVE_MUL], Execution-Latency: 9, Execution-Throughput: 1}, + '7': {Instruction-Groups: [SVE_DIV_OR_SQRT], Execution-Latency: 98, Execution-Throughput: 98}, + '8': {Instruction-Groups: [PREDICATE], Execution-Latency: 3, Execution-Throughput: 1}, + '9': {Instruction-Groups: [LOAD_SCALAR, LOAD_VECTOR, STORE_ADDRESS_SCALAR, STORE_ADDRESS_VECTOR], Execution-Latency: 3, Execution-Throughput: 1}, + '10': {Instruction-Groups: [LOAD_SVE, STORE_ADDRESS_SVE], Execution-Latency: 6, Execution-Throughput: 1} + } + })YAML"); + + // fdivr z1.s, p0/m, z1.s, z0.s + std::array validInstrBytes = {0x01, 0x80, 0x8c, 0x65}; + std::array invalidInstrBytes = {0x20, 0x00, 0x02, 0x8c}; + + std::unique_ptr arch; + kernel::Linux kernel; + kernel::LinuxProcess process = kernel::LinuxProcess( + span((char*)validInstrBytes.data(), validInstrBytes.size())); +}; + +TEST_F(AArch64ArchitectureTest, predecode) { + // Test that mis-aligned instruction address results in error + MacroOp output; + uint8_t result = arch->predecode(validInstrBytes.data(), + validInstrBytes.size(), 0x7, output); + Instruction* aarch64Insn = reinterpret_cast(output[0].get()); + EXPECT_EQ(result, 1); + EXPECT_EQ(aarch64Insn->getInstructionAddress(), 0x7); + EXPECT_EQ(aarch64Insn->exceptionEncountered(), true); + EXPECT_EQ(aarch64Insn->getException(), InstructionException::MisalignedPC); + + // Test that an invalid instruction returns instruction with an exception + output = MacroOp(); + result = arch->predecode(invalidInstrBytes.data(), invalidInstrBytes.size(), + 0x8, output); + aarch64Insn = reinterpret_cast(output[0].get()); + EXPECT_EQ(result, 4); + EXPECT_EQ(aarch64Insn->getInstructionAddress(), 0x8); + EXPECT_EQ(aarch64Insn->exceptionEncountered(), true); + EXPECT_EQ(aarch64Insn->getException(), + InstructionException::EncodingUnallocated); + + // Test that an instruction can be properly decoded + output = MacroOp(); + result = arch->predecode(validInstrBytes.data(), validInstrBytes.size(), 0x4, + output); + EXPECT_EQ(result, 4); + EXPECT_EQ(output[0]->getInstructionAddress(), 0x4); + EXPECT_EQ(output[0]->exceptionEncountered(), false); +} + +TEST_F(AArch64ArchitectureTest, getSystemRegisterTag) { + // Test incorrect system register will fail + int32_t output = arch->getSystemRegisterTag(-1); + EXPECT_EQ(output, -1); + + // Test for correct behaviour + output = arch->getSystemRegisterTag(ARM64_SYSREG_DCZID_EL0); + EXPECT_EQ(output, 0); +} + +TEST_F(AArch64ArchitectureTest, handleException) { + // Get Instruction + MacroOp insn; + uint8_t bytes = arch->predecode(invalidInstrBytes.data(), + invalidInstrBytes.size(), 0x4, insn); + Instruction* aarch64Insn = reinterpret_cast(insn[0].get()); + EXPECT_EQ(bytes, 4); + EXPECT_EQ(aarch64Insn->getInstructionAddress(), 0x4); + EXPECT_EQ(aarch64Insn->exceptionEncountered(), true); + EXPECT_EQ(aarch64Insn->getException(), + InstructionException::EncodingUnallocated); + + // Get Core + std::string executablePath = ""; + std::vector executableArgs = {}; + std::unique_ptr coreInstance = + std::make_unique(executablePath, executableArgs); + const Core& core = *coreInstance->getCore(); + MemoryInterface& memInt = *coreInstance->getDataMemory(); + auto exceptionHandler = arch->handleException(insn[0], core, memInt); + + bool tickRes = exceptionHandler->tick(); + auto result = exceptionHandler->getResult(); + EXPECT_TRUE(tickRes); + EXPECT_TRUE(result.fatal); + // Instruction address for fatal exception is always 0. + EXPECT_EQ(result.instructionAddress, 0x0); +} + +TEST_F(AArch64ArchitectureTest, getInitialState) { + std::vector regs = { + {RegisterType::GENERAL, 31}, + {RegisterType::SYSTEM, + (uint16_t)arch->getSystemRegisterTag(ARM64_SYSREG_DCZID_EL0)}}; + std::vector regVals = {{kernel.getInitialStackPointer(), 8}, + {20, 8}}; + + arch::ProcessStateChange changes = arch->getInitialState(); + EXPECT_EQ(changes.type, arch::ChangeType::REPLACEMENT); + EXPECT_EQ(changes.modifiedRegisters, regs); + EXPECT_EQ(changes.modifiedRegisterValues, regVals); +} + +TEST_F(AArch64ArchitectureTest, getMaxInstructionSize) { + EXPECT_EQ(arch->getMaxInstructionSize(), 4); +} + +TEST_F(AArch64ArchitectureTest, getVectorLength) { + EXPECT_EQ(arch->getVectorLength(), 512); +} + +TEST_F(AArch64ArchitectureTest, getStreamingVectorLength) { + // Default SVL value is 128 + EXPECT_EQ(arch->getStreamingVectorLength(), 128); +} + +TEST_F(AArch64ArchitectureTest, updateSystemTimerRegisters) { + RegisterFileSet regFile = config::SimInfo::getArchRegStruct(); + + uint8_t vctCount = 0; + // In A64FX, Timer frequency = (2.5 * 1e9) / (100 * 1e6) = 18 + uint64_t vctModulo = + (config::SimInfo::getConfig()["Core"]["Clock-Frequency-GHz"].as() * + 1e9) / + (config::SimInfo::getConfig()["Core"]["Timer-Frequency-MHz"] + .as() * + 1e6); + for (int i = 0; i < 30; i++) { + vctCount += (i % vctModulo) == 0 ? 1 : 0; + arch->updateSystemTimerRegisters(®File, i); + EXPECT_EQ( + regFile + .get({RegisterType::SYSTEM, (uint16_t)arch->getSystemRegisterTag( + ARM64_SYSREG_PMCCNTR_EL0)}) + .get(), + i); + EXPECT_EQ( + regFile + .get({RegisterType::SYSTEM, (uint16_t)arch->getSystemRegisterTag( + ARM64_SYSREG_CNTVCT_EL0)}) + .get(), + vctCount); + } +} + +TEST_F(AArch64ArchitectureTest, getExecutionInfo) { + MacroOp insn; + uint64_t bytes = arch->predecode(validInstrBytes.data(), + validInstrBytes.size(), 0x4, insn); + // Insn[0] = fdivr z1.s, p0/m, z1.s, z0.s + Instruction* aarch64Insn = reinterpret_cast(insn[0].get()); + EXPECT_EQ(bytes, 4); + EXPECT_EQ(aarch64Insn->getInstructionAddress(), 0x4); + EXPECT_EQ(aarch64Insn->exceptionEncountered(), false); + + ExecutionInfo info = arch->getExecutionInfo(*aarch64Insn); + + // Latencies and Port numbers from a64fx.yaml + EXPECT_EQ(info.latency, 98); + EXPECT_EQ(info.stallCycles, 98); + std::vector ports = {0}; + EXPECT_EQ(info.ports, ports); +} + +TEST_F(AArch64ArchitectureTest, get_set_SVCRVal) { + EXPECT_EQ(arch->getSVCRval(), 0); + arch->setSVCRval(3); + EXPECT_EQ(arch->getSVCRval(), 3); +} + +} // namespace aarch64 +} // namespace arch +} // namespace simeng diff --git a/test/unit/aarch64/AuxiliaryFunctionsTest.cc b/test/unit/aarch64/AuxiliaryFunctionsTest.cc new file mode 100644 index 0000000000..554280f944 --- /dev/null +++ b/test/unit/aarch64/AuxiliaryFunctionsTest.cc @@ -0,0 +1,632 @@ +#include "gtest/gtest.h" +#include "simeng/arch/aarch64/helpers/auxiliaryFunctions.hh" + +namespace simeng { +namespace arch { +namespace aarch64 { + +/** `nzcv` Tests */ +TEST(AArch64AuxiliaryFunctionTest, NzcvTest) { + EXPECT_EQ(nzcv(true, true, true, true), 0b00001111); + EXPECT_EQ(nzcv(false, false, false, false), 0b00000000); + EXPECT_EQ(nzcv(true, false, false, true), 0b00001001); + EXPECT_EQ(nzcv(false, true, false, false), 0b00000100); +} + +/** `addWithCarry` Tests */ +TEST(AArch64AuxiliaryFunctionTest, AddWithCarry) { + std::tuple u8Res = {111, 0b0010}; + EXPECT_EQ(addWithCarry(123, 244, false), u8Res); + + std::tuple u16Res = {0xFFFD, 0b1000}; + EXPECT_EQ(addWithCarry(0xFFF0, 0x000C, true), u16Res); + + std::tuple u32Res = {2147483649, 0b1001}; + EXPECT_EQ(addWithCarry(1, 2147483647, true), u32Res); + + std::tuple u64Res = {0, 0b0110}; + EXPECT_EQ(addWithCarry(0xFFFFFFFFFFFFFFFF, 1, false), u64Res); +} + +/** `bitfieldManipulate` Tests */ +TEST(AArch64AuxiliaryFunctionTest, BitfieldManipulate) { + // uint8 + EXPECT_EQ(bitfieldManipulate(0xFF, 12, 2, 1, false), 204); + EXPECT_EQ(bitfieldManipulate(16, 3, 0xFF, 24, false), 3); + EXPECT_EQ(bitfieldManipulate(0, 64, 4, 8, false), 64); + EXPECT_EQ(bitfieldManipulate(64, 0, 8, 4, false), 0); + + EXPECT_EQ(bitfieldManipulate(0xFF, 12, 2, 1, true), 204); + EXPECT_EQ(bitfieldManipulate(16, 3, 0xFF, 24, true), 3); + EXPECT_EQ(bitfieldManipulate(0, 64, 4, 8, true), 0); + EXPECT_EQ(bitfieldManipulate(64, 8, 8, 4, true), 0); + + // uint16 + EXPECT_EQ(bitfieldManipulate(0xFFFF, 12, 2, 1, false), 49164); + EXPECT_EQ(bitfieldManipulate(16, 3, 0xFF, 24, false), 3); + EXPECT_EQ(bitfieldManipulate(0, 64, 4, 8, false), 64); + EXPECT_EQ(bitfieldManipulate(64, 0, 8, 4, false), 0); + + EXPECT_EQ(bitfieldManipulate(0xFFFF, 12, 2, 1, true), 49164); + EXPECT_EQ(bitfieldManipulate(16, 3, 0xFF, 24, true), 3); + EXPECT_EQ(bitfieldManipulate(0, 64, 4, 8, true), 0); + EXPECT_EQ(bitfieldManipulate(64, 8, 8, 4, true), 8); + + // uint32 + EXPECT_EQ(bitfieldManipulate(0xFFFFFFFF, 12, 2, 1, false), + 3221225484); + EXPECT_EQ(bitfieldManipulate(16, 3, 0xFF, 24, false), 33); + EXPECT_EQ(bitfieldManipulate(0, 64, 4, 8, false), 64); + EXPECT_EQ(bitfieldManipulate(64, 0, 8, 4, false), 0); + + EXPECT_EQ(bitfieldManipulate(0xFFFFFFFF, 12, 2, 1, true), + 3221225484); + EXPECT_EQ(bitfieldManipulate(16, 3, 0xFF, 24, true), 33); + EXPECT_EQ(bitfieldManipulate(0, 64, 4, 8, true), 0); + EXPECT_EQ(bitfieldManipulate(64, 8, 8, 4, true), 8); + + // uint64 + EXPECT_EQ(bitfieldManipulate(0xFFFFFFFFFFFFFFFF, 12, 2, 1, false), + 13835058055282163724u); + EXPECT_EQ(bitfieldManipulate(16, 3, 0xFF, 24, false), 33); + EXPECT_EQ(bitfieldManipulate(0, 64, 4, 8, false), 64); + EXPECT_EQ(bitfieldManipulate(64, 0, 8, 4, false), 0); + + EXPECT_EQ(bitfieldManipulate(0xFFFFFFFFFFFFFFFF, 12, 2, 1, true), + 13835058055282163724u); + EXPECT_EQ(bitfieldManipulate(16, 3, 0xFF, 24, true), 33); + EXPECT_EQ(bitfieldManipulate(0, 64, 4, 8, true), 0); + EXPECT_EQ(bitfieldManipulate(64, 8, 8, 4, true), 8); +} + +/** `conditionHolds` Tests */ +TEST(AArch64AuxiliaryFunctionTest, ConditionHolds) { + // Run each condition at least twice, one which we expect to be true, one we + // expect to be false + + // Inverse False + // EQ/NE + EXPECT_TRUE(conditionHolds(0b0000, 0b0100)); + EXPECT_FALSE(conditionHolds(0b0000, 0b1011)); + + // CS/CC + EXPECT_TRUE(conditionHolds(0b0010, 0b0010)); + EXPECT_FALSE(conditionHolds(0b0010, 0b1101)); + + // MI/PL + EXPECT_TRUE(conditionHolds(0b0100, 0b1000)); + EXPECT_FALSE(conditionHolds(0b0100, 0b0111)); + + // VS/VC + EXPECT_TRUE(conditionHolds(0b0110, 0b0001)); + EXPECT_FALSE(conditionHolds(0b0110, 0b1110)); + + // HI/LS + EXPECT_TRUE(conditionHolds(0b1000, 0b1010)); + EXPECT_FALSE(conditionHolds(0b1000, 0b1111)); + EXPECT_FALSE(conditionHolds(0b1000, 0b1001)); + + // GE/LT + EXPECT_TRUE(conditionHolds(0b1010, 0b1001)); + EXPECT_TRUE(conditionHolds(0b1010, 0b0000)); + EXPECT_FALSE(conditionHolds(0b1010, 0b1000)); + + // GT/LE + EXPECT_TRUE(conditionHolds(0b1100, 0b1001)); + EXPECT_TRUE(conditionHolds(0b1100, 0b0000)); + EXPECT_FALSE(conditionHolds(0b1100, 0b0001)); + EXPECT_FALSE(conditionHolds(0b1100, 0b1000)); + EXPECT_FALSE(conditionHolds(0b1100, 0b1101)); + + // Condition of 0b111 always returns `true` + // AL + EXPECT_TRUE(conditionHolds(0b1110, 0b1111)); + EXPECT_TRUE(conditionHolds(0b1110, 0b0000)); + + // Inverse True + // EQ/NE + EXPECT_FALSE(conditionHolds(0b0001, 0b0100)); + EXPECT_TRUE(conditionHolds(0b0001, 0b1011)); + + // CS/CC + EXPECT_FALSE(conditionHolds(0b0011, 0b0010)); + EXPECT_TRUE(conditionHolds(0b0011, 0b1101)); + + // MI/PL + EXPECT_FALSE(conditionHolds(0b0101, 0b1000)); + EXPECT_TRUE(conditionHolds(0b0101, 0b0111)); + + // VS/VC + EXPECT_FALSE(conditionHolds(0b0111, 0b0001)); + EXPECT_TRUE(conditionHolds(0b0111, 0b1110)); + + // HI/LS + EXPECT_FALSE(conditionHolds(0b1001, 0b1010)); + EXPECT_TRUE(conditionHolds(0b1001, 0b1111)); + EXPECT_TRUE(conditionHolds(0b1001, 0b1001)); + + // GE/LT + EXPECT_FALSE(conditionHolds(0b1011, 0b1001)); + EXPECT_FALSE(conditionHolds(0b1011, 0b0000)); + EXPECT_TRUE(conditionHolds(0b1011, 0b1000)); + + // GT/LE + EXPECT_FALSE(conditionHolds(0b1101, 0b1001)); + EXPECT_FALSE(conditionHolds(0b1101, 0b0000)); + EXPECT_TRUE(conditionHolds(0b1101, 0b0001)); + EXPECT_TRUE(conditionHolds(0b1101, 0b1000)); + EXPECT_TRUE(conditionHolds(0b1101, 0b1101)); + + // AL + // Cond=0b111 and inverse of 1 always returns `true` + EXPECT_TRUE(conditionHolds(0b1111, 0b1111)); + EXPECT_TRUE(conditionHolds(0b1111, 0b0000)); +} + +/** `extendValue` Tests */ +TEST(AArch64AuxiliaryFunctionTest, ExtendValue) { + // Test special case + EXPECT_EQ(extendValue(123, ARM64_EXT_INVALID, 0), 123); + + // Results validated on XCI and A64FX hardware + EXPECT_EQ(extendValue(270, ARM64_EXT_UXTB, 3), 112); + EXPECT_EQ(extendValue(65560, ARM64_EXT_UXTH, 3), 192); + EXPECT_EQ(extendValue(0xFFFFFFFF, ARM64_EXT_UXTW, 3), 34359738360); + EXPECT_EQ(extendValue(0x0F0F0F0F0F0F0F01, ARM64_EXT_UXTX, 4), + 0xF0F0F0F0F0F0F010); + + EXPECT_EQ(extendValue(133, ARM64_EXT_SXTB, 3), -984); + EXPECT_EQ(extendValue(32768, ARM64_EXT_SXTH, 3), -262144); + EXPECT_EQ(extendValue(2147483648, ARM64_EXT_SXTW, 3), -17179869184); + EXPECT_EQ(extendValue(0x8000000000000000, ARM64_EXT_SXTX, 3), 0); +} + +/** `getNZCVfromPred` Tests */ +TEST(AArch64AuxiliaryFunctionTest, getNZCVfromPred) { + uint64_t vl = 128; + // VL 128 will only use array[0] + EXPECT_EQ(getNZCVfromPred( + {0, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + vl, 1), + 0b0110); + EXPECT_EQ( + getNZCVfromPred({0xFFFFFFFFFFFFFFFF, 0, 0, 0x300000000000000C}, vl, 2), + 0b1000); + EXPECT_EQ(getNZCVfromPred( + {0xE000000000000000, 0xE000000000000000, 0xE000000000000000, 0}, + vl, 4), + 0b0010); + EXPECT_EQ(getNZCVfromPred({0, 0x8000000000000001, 0, 0}, vl, 8), 0b0110); + + vl = 256; + // VL 256 will only use array[0] + EXPECT_EQ(getNZCVfromPred( + {0, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + vl, 1), + 0b0110); + EXPECT_EQ( + getNZCVfromPred({0xFFFFFFFFFFFFFFFF, 0, 0, 0x300000000000000C}, vl, 2), + 0b1000); + EXPECT_EQ(getNZCVfromPred( + {0xE000000000000000, 0xE000000000000000, 0xE000000000000000, 0}, + vl, 4), + 0b0010); + EXPECT_EQ(getNZCVfromPred({0, 0x8000000000000001, 0, 0}, vl, 8), 0b0110); + + vl = 512; + // VL 512 will only use array[0] + EXPECT_EQ(getNZCVfromPred( + {0, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + vl, 1), + 0b0110); + EXPECT_EQ( + getNZCVfromPred({0xFFFFFFFFFFFFFFFF, 0, 0, 0x300000000000000C}, vl, 2), + 0b1000); + EXPECT_EQ(getNZCVfromPred( + {0xE000000000000000, 0xE000000000000000, 0xE000000000000000, 0}, + vl, 4), + 0b0010); + EXPECT_EQ(getNZCVfromPred({0, 0x8000000000000001, 0, 0}, vl, 8), 0b0110); + + vl = 1024; + // VL 1024 will only use array[0, 1] + EXPECT_EQ(getNZCVfromPred( + {0, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + vl, 1), + 0b0000); + EXPECT_EQ( + getNZCVfromPred({0xFFFFFFFFFFFFFFFF, 0, 0, 0x300000000000000C}, vl, 2), + 0b1010); + EXPECT_EQ(getNZCVfromPred( + {0xE000000000000000, 0xE000000000000000, 0xE000000000000000, 0}, + vl, 4), + 0b0010); + EXPECT_EQ(getNZCVfromPred({0, 0x8000000000000000, 0, 0}, vl, 8), 0b0010); + + vl = 2048; + // VL 2048 will only use array[0, 1, 2, 3] + EXPECT_EQ(getNZCVfromPred( + {0, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, + vl, 1), + 0b0000); + EXPECT_EQ( + getNZCVfromPred({0xFFFFFFFFFFFFFFFF, 0, 0, 0x300000000000000C}, vl, 2), + 0b1010); + EXPECT_EQ(getNZCVfromPred( + {0xE000000000000000, 0xE000000000000000, 0xE000000000000000, 0}, + vl, 4), + 0b0010); + EXPECT_EQ(getNZCVfromPred({0, 0x8000000000000001, 0, 0}, vl, 8), 0b0010); +} + +/** `mulhi` Tests */ +TEST(AArch64AuxiliaryFunctionTest, Mulhi) { + EXPECT_EQ(mulhi(0xFFFFFFFFFFFFFFFF, 2), 1); + EXPECT_EQ(mulhi(1, 245), 0); + + EXPECT_EQ(mulhi(0xF000000000000000, 4), 3); + EXPECT_EQ(mulhi(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), 0xFFFFFFFFFFFFFFFE); + EXPECT_EQ(mulhi(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF00000000), 0xFFFFFFFEFFFFFFFF); + EXPECT_EQ(mulhi(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF), 0xFFFFFFFE); +} + +/** `sveGetPattern` Tests */ +TEST(AArch64AuxiliaryFunctionTest, sveGetPattern) { + uint16_t vl = 128; + EXPECT_EQ(sveGetPattern("", 64, vl), 2); + EXPECT_EQ(sveGetPattern("", 16, vl), 8); + EXPECT_EQ(sveGetPattern("all", 64, vl), 2); + EXPECT_EQ(sveGetPattern("all", 16, vl), 8); + EXPECT_EQ(sveGetPattern("notValid", 64, vl), 2); + EXPECT_EQ(sveGetPattern("notValid", 16, vl), 8); + + EXPECT_EQ(sveGetPattern("vl1", 64, vl), 1); + EXPECT_EQ(sveGetPattern("vl2", 64, vl), 2); + EXPECT_EQ(sveGetPattern("vl3", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl4", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl5", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl6", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl7", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl8", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl16", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl32", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl64", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl128", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl256", 64, vl), 0); + + EXPECT_EQ(sveGetPattern("mul4", 8, vl), 16); + EXPECT_EQ(sveGetPattern("mul3", 8, vl), 15); + + vl = 256; + EXPECT_EQ(sveGetPattern("", 64, vl), 4); + EXPECT_EQ(sveGetPattern("", 16, vl), 16); + EXPECT_EQ(sveGetPattern("all", 64, vl), 4); + EXPECT_EQ(sveGetPattern("all", 16, vl), 16); + EXPECT_EQ(sveGetPattern("notValid", 64, vl), 4); + EXPECT_EQ(sveGetPattern("notValid", 16, vl), 16); + + EXPECT_EQ(sveGetPattern("vl1", 64, vl), 1); + EXPECT_EQ(sveGetPattern("vl2", 64, vl), 2); + EXPECT_EQ(sveGetPattern("vl3", 64, vl), 3); + EXPECT_EQ(sveGetPattern("vl4", 64, vl), 4); + EXPECT_EQ(sveGetPattern("vl5", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl6", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl7", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl8", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl16", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl32", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl64", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl128", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl256", 64, vl), 0); + + EXPECT_EQ(sveGetPattern("mul4", 8, vl), 32); + EXPECT_EQ(sveGetPattern("mul3", 8, vl), 30); + + vl = 512; + EXPECT_EQ(sveGetPattern("", 64, vl), 8); + EXPECT_EQ(sveGetPattern("", 16, vl), 32); + EXPECT_EQ(sveGetPattern("all", 64, vl), 8); + EXPECT_EQ(sveGetPattern("all", 16, vl), 32); + EXPECT_EQ(sveGetPattern("notValid", 64, vl), 8); + EXPECT_EQ(sveGetPattern("notValid", 16, vl), 32); + + EXPECT_EQ(sveGetPattern("vl1", 64, vl), 1); + EXPECT_EQ(sveGetPattern("vl2", 64, vl), 2); + EXPECT_EQ(sveGetPattern("vl3", 64, vl), 3); + EXPECT_EQ(sveGetPattern("vl4", 64, vl), 4); + EXPECT_EQ(sveGetPattern("vl5", 64, vl), 5); + EXPECT_EQ(sveGetPattern("vl6", 64, vl), 6); + EXPECT_EQ(sveGetPattern("vl7", 64, vl), 7); + EXPECT_EQ(sveGetPattern("vl8", 64, vl), 8); + EXPECT_EQ(sveGetPattern("vl16", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl32", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl64", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl128", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl256", 64, vl), 0); + + EXPECT_EQ(sveGetPattern("mul4", 8, vl), 64); + EXPECT_EQ(sveGetPattern("mul3", 8, vl), 63); + + vl = 1024; + EXPECT_EQ(sveGetPattern("", 64, vl), 16); + EXPECT_EQ(sveGetPattern("", 16, vl), 64); + EXPECT_EQ(sveGetPattern("all", 64, vl), 16); + EXPECT_EQ(sveGetPattern("all", 16, vl), 64); + EXPECT_EQ(sveGetPattern("notValid", 64, vl), 16); + EXPECT_EQ(sveGetPattern("notValid", 16, vl), 64); + + EXPECT_EQ(sveGetPattern("vl1", 64, vl), 1); + EXPECT_EQ(sveGetPattern("vl2", 64, vl), 2); + EXPECT_EQ(sveGetPattern("vl3", 64, vl), 3); + EXPECT_EQ(sveGetPattern("vl4", 64, vl), 4); + EXPECT_EQ(sveGetPattern("vl5", 64, vl), 5); + EXPECT_EQ(sveGetPattern("vl6", 64, vl), 6); + EXPECT_EQ(sveGetPattern("vl7", 64, vl), 7); + EXPECT_EQ(sveGetPattern("vl8", 64, vl), 8); + EXPECT_EQ(sveGetPattern("vl16", 64, vl), 16); + EXPECT_EQ(sveGetPattern("vl32", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl64", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl128", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl256", 64, vl), 0); + + EXPECT_EQ(sveGetPattern("mul4", 8, vl), 128); + EXPECT_EQ(sveGetPattern("mul3", 8, vl), 126); + + vl = 2048; + EXPECT_EQ(sveGetPattern("", 64, vl), 32); + EXPECT_EQ(sveGetPattern("", 16, vl), 128); + EXPECT_EQ(sveGetPattern("all", 64, vl), 32); + EXPECT_EQ(sveGetPattern("all", 16, vl), 128); + EXPECT_EQ(sveGetPattern("notValid", 64, vl), 32); + EXPECT_EQ(sveGetPattern("notValid", 16, vl), 128); + + EXPECT_EQ(sveGetPattern("vl1", 64, vl), 1); + EXPECT_EQ(sveGetPattern("vl2", 64, vl), 2); + EXPECT_EQ(sveGetPattern("vl3", 64, vl), 3); + EXPECT_EQ(sveGetPattern("vl4", 64, vl), 4); + EXPECT_EQ(sveGetPattern("vl5", 64, vl), 5); + EXPECT_EQ(sveGetPattern("vl6", 64, vl), 6); + EXPECT_EQ(sveGetPattern("vl7", 64, vl), 7); + EXPECT_EQ(sveGetPattern("vl8", 64, vl), 8); + EXPECT_EQ(sveGetPattern("vl16", 64, vl), 16); + EXPECT_EQ(sveGetPattern("vl32", 64, vl), 32); + EXPECT_EQ(sveGetPattern("vl64", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl128", 64, vl), 0); + EXPECT_EQ(sveGetPattern("vl256", 64, vl), 0); + + EXPECT_EQ(sveGetPattern("mul4", 8, vl), 256); + EXPECT_EQ(sveGetPattern("mul3", 8, vl), 255); +} + +/** `ShiftValue` Tests */ +TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_LSL) { + // 8-bit + const uint8_t a = 0x0F; + EXPECT_EQ(shiftValue(a, ARM64_SFT_LSL, 4), 0xF0); + + const uint8_t b = 0xF0; + EXPECT_EQ(shiftValue(b, ARM64_SFT_LSL, 7), 0x00); + + EXPECT_EQ(shiftValue(b, ARM64_SFT_LSL, 0), b); + + // 16-bit + const uint16_t c = 0x00FF; + EXPECT_EQ(shiftValue(c, ARM64_SFT_LSL, 8), 0xFF00); + + const uint16_t d = 0xFF00; + EXPECT_EQ(shiftValue(d, ARM64_SFT_LSL, 15), 0x0000); + + EXPECT_EQ(shiftValue(d, ARM64_SFT_LSL, 0), d); + + // 32-bit + const uint32_t e = 0x0000FFFF; + EXPECT_EQ(shiftValue(e, ARM64_SFT_LSL, 16), 0xFFFF0000); + + const uint32_t f = 0xFFFF0000; + EXPECT_EQ(shiftValue(f, ARM64_SFT_LSL, 31), 0x00000000); + + EXPECT_EQ(shiftValue(f, ARM64_SFT_LSL, 0), f); + + // 64-bit + const uint64_t g = 0x00000000FFFFFFFF; + EXPECT_EQ(shiftValue(g, ARM64_SFT_LSL, 32), 0xFFFFFFFF00000000); + + const uint64_t h = 0xFFFFFFFF00000000; + EXPECT_EQ(shiftValue(h, ARM64_SFT_LSL, 63), 0x0000000000000000); + + EXPECT_EQ(shiftValue(h, ARM64_SFT_LSL, 0), h); +} + +TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_LSR) { + // 8-bit + const uint8_t a = 0x0F; + EXPECT_EQ(shiftValue(a, ARM64_SFT_LSR, 4), 0x00); + + const uint8_t b = 0xF0; + EXPECT_EQ(shiftValue(b, ARM64_SFT_LSR, 7), 0x01); + + EXPECT_EQ(shiftValue(b, ARM64_SFT_LSR, 0), b); + + // 16-bit + const uint16_t c = 0x00FF; + EXPECT_EQ(shiftValue(c, ARM64_SFT_LSR, 8), 0x0); + + const uint16_t d = 0xFF00; + EXPECT_EQ(shiftValue(d, ARM64_SFT_LSR, 15), 0x0001); + + EXPECT_EQ(shiftValue(d, ARM64_SFT_LSR, 0), d); + + // 32-bit + const uint32_t e = 0x0000FFFF; + EXPECT_EQ(shiftValue(e, ARM64_SFT_LSR, 16), 0x00000000); + + const uint32_t f = 0xFFFF0000; + EXPECT_EQ(shiftValue(f, ARM64_SFT_LSR, 31), 0x00000001); + + EXPECT_EQ(shiftValue(f, ARM64_SFT_LSR, 0), f); + + // 64-bit + const uint64_t g = 0x00000000FFFFFFFF; + EXPECT_EQ(shiftValue(g, ARM64_SFT_LSR, 32), 0x0000000000000000); + + const uint64_t h = 0xFFFFFFFF00000000; + EXPECT_EQ(shiftValue(h, ARM64_SFT_LSR, 63), 0x0000000000000001); + + EXPECT_EQ(shiftValue(h, ARM64_SFT_LSR, 0), h); +} + +TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_ASR) { + // 8-bit + const uint8_t a = 0x0F; + EXPECT_EQ(shiftValue(a, ARM64_SFT_ASR, 4), 0x00); + + const uint8_t b = 0xF0; + EXPECT_EQ(shiftValue(b, ARM64_SFT_ASR, 7), 0xFF); + + EXPECT_EQ(shiftValue(b, ARM64_SFT_ASR, 0), b); + + // 16-bit + const uint16_t c = 0x00FF; + EXPECT_EQ(shiftValue(c, ARM64_SFT_ASR, 8), 0x0000); + + const uint16_t d = 0xFF00; + EXPECT_EQ(shiftValue(d, ARM64_SFT_ASR, 15), 0xFFFF); + + EXPECT_EQ(shiftValue(d, ARM64_SFT_ASR, 0), d); + + // 32-bit + const uint32_t e = 0x0000FFFF; + EXPECT_EQ(shiftValue(e, ARM64_SFT_ASR, 16), 0x00000000); + + const uint32_t f = 0xFFFF0000; + EXPECT_EQ(shiftValue(f, ARM64_SFT_ASR, 31), 0xFFFFFFFF); + + EXPECT_EQ(shiftValue(f, ARM64_SFT_ASR, 0), f); + + // 64-bit + const uint64_t g = 0x00000000FFFFFFFF; + EXPECT_EQ(shiftValue(g, ARM64_SFT_ASR, 32), 0x0000000000000000); + + const uint64_t h = 0xFFFFFFFF00000000; + EXPECT_EQ(shiftValue(h, ARM64_SFT_ASR, 63), 0xFFFFFFFFFFFFFFFF); + + EXPECT_EQ(shiftValue(h, ARM64_SFT_ASR, 0), h); +} + +TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_ROR) { + // 8-bit + const uint8_t a = 0x0F; + EXPECT_EQ(shiftValue(a, ARM64_SFT_ROR, 4), 0xF0); + + const uint8_t b = 0xF0; + EXPECT_EQ(shiftValue(b, ARM64_SFT_ROR, 7), 0xE1); + + EXPECT_EQ(shiftValue(b, ARM64_SFT_ROR, 0), b); + + // 16-bit + const uint16_t c = 0x00FF; + EXPECT_EQ(shiftValue(c, ARM64_SFT_ROR, 8), 0xFF00); + + const uint16_t d = 0xFF00; + EXPECT_EQ(shiftValue(d, ARM64_SFT_ROR, 15), 0xFE01); + + EXPECT_EQ(shiftValue(d, ARM64_SFT_ROR, 0), d); + + // 32-bit + const uint32_t e = 0x0000FFFF; + EXPECT_EQ(shiftValue(e, ARM64_SFT_ROR, 16), 0xFFFF0000); + + const uint32_t f = 0xFFFF0000; + EXPECT_EQ(shiftValue(f, ARM64_SFT_ROR, 31), 0xFFFE0001); + + EXPECT_EQ(shiftValue(f, ARM64_SFT_ROR, 0), f); + + // 64-bit + const uint64_t g = 0x00000000FFFFFFFF; + EXPECT_EQ(shiftValue(g, ARM64_SFT_ROR, 32), 0xFFFFFFFF00000000); + + const uint64_t h = 0xFFFFFFFF00000000; + EXPECT_EQ(shiftValue(h, ARM64_SFT_ROR, 63), 0xFFFFFFFE00000001); + + EXPECT_EQ(shiftValue(h, ARM64_SFT_ROR, 0), h); +} + +TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_MSL) { + // 8-bit + const uint8_t a = 0x0F; + EXPECT_EQ(shiftValue(a, ARM64_SFT_MSL, 4), 0xFF); + + const uint8_t b = 0xF0; + EXPECT_EQ(shiftValue(b, ARM64_SFT_MSL, 7), 0x7F); + + EXPECT_EQ(shiftValue(b, ARM64_SFT_MSL, 0), b); + + // 16-bit + const uint16_t c = 0x00FF; + EXPECT_EQ(shiftValue(c, ARM64_SFT_MSL, 8), 0xFFFF); + + const uint16_t d = 0xFF00; + EXPECT_EQ(shiftValue(d, ARM64_SFT_MSL, 15), 0x7FFF); + + EXPECT_EQ(shiftValue(d, ARM64_SFT_MSL, 0), d); + + // 32-bit + const uint32_t e = 0x0000FFFF; + EXPECT_EQ(shiftValue(e, ARM64_SFT_MSL, 16), 0xFFFFFFFF); + + const uint32_t f = 0xFFFF0000; + EXPECT_EQ(shiftValue(f, ARM64_SFT_MSL, 31), 0x7FFFFFFF); + + EXPECT_EQ(shiftValue(f, ARM64_SFT_MSL, 0), f); + + // 64-bit + const uint64_t g = 0x00000000FFFFFFFF; + EXPECT_EQ(shiftValue(g, ARM64_SFT_MSL, 32), 0xFFFFFFFFFFFFFFFF); + + const uint64_t h = 0xFFFFFFFF00000000; + EXPECT_EQ(shiftValue(h, ARM64_SFT_MSL, 63), 0x7FFFFFFFFFFFFFFF); + + EXPECT_EQ(shiftValue(h, ARM64_SFT_MSL, 0), h); +} + +TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_INVALID) { + // 8-bit + const uint8_t a = 0x0F; + EXPECT_EQ(shiftValue(a, ARM64_SFT_INVALID, 4), a); + + const uint8_t b = 0xF0; + EXPECT_EQ(shiftValue(b, ARM64_SFT_INVALID, 7), b); + + EXPECT_EQ(shiftValue(b, ARM64_SFT_INVALID, 0), b); + + // 16-bit + const uint16_t c = 0x00FF; + EXPECT_EQ(shiftValue(c, ARM64_SFT_INVALID, 8), c); + + const uint16_t d = 0xFF00; + EXPECT_EQ(shiftValue(d, ARM64_SFT_INVALID, 15), d); + + EXPECT_EQ(shiftValue(d, ARM64_SFT_INVALID, 0), d); + + // 32-bit + const uint32_t e = 0x0000FFFF; + EXPECT_EQ(shiftValue(e, ARM64_SFT_INVALID, 16), e); + + const uint32_t f = 0xFFFF0000; + EXPECT_EQ(shiftValue(f, ARM64_SFT_INVALID, 31), f); + + EXPECT_EQ(shiftValue(f, ARM64_SFT_INVALID, 0), f); + + // 64-bit + const uint64_t g = 0x00000000FFFFFFFF; + EXPECT_EQ(shiftValue(g, ARM64_SFT_INVALID, 32), g); + + const uint64_t h = 0xFFFFFFFF00000000; + EXPECT_EQ(shiftValue(h, ARM64_SFT_INVALID, 63), h); + + EXPECT_EQ(shiftValue(h, ARM64_SFT_INVALID, 0), h); +} + +} // namespace aarch64 +} // namespace arch +} // namespace simeng \ No newline at end of file diff --git a/test/unit/aarch64/ExceptionHandlerTest.cc b/test/unit/aarch64/ExceptionHandlerTest.cc new file mode 100644 index 0000000000..72ce6f1a22 --- /dev/null +++ b/test/unit/aarch64/ExceptionHandlerTest.cc @@ -0,0 +1,708 @@ +#include "../ConfigInit.hh" +#include "../MockCore.hh" +#include "../MockInstruction.hh" +#include "../MockMemoryInterface.hh" +#include "gmock/gmock.h" +#include "simeng/ArchitecturalRegisterFileSet.hh" +#include "simeng/arch/aarch64/Architecture.hh" +#include "simeng/arch/aarch64/ExceptionHandler.hh" +#include "simeng/arch/aarch64/Instruction.hh" + +namespace simeng { +namespace arch { +namespace aarch64 { + +using ::testing::HasSubstr; +using ::testing::Return; +using ::testing::ReturnRef; + +class AArch64ExceptionHandlerTest : public ::testing::Test { + public: + AArch64ExceptionHandlerTest() + : kernel(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"] + .as()), + arch(kernel), + physRegFileSet(config::SimInfo::getArchRegStruct()), + archRegFileSet(physRegFileSet) {} + + protected: + ConfigInit configInit = ConfigInit(config::ISA::AArch64, ""); + + MockCore core; + MockMemoryInterface memory; + kernel::Linux kernel; + Architecture arch; + + RegisterFileSet physRegFileSet; + ArchitecturalRegisterFileSet archRegFileSet; + + // fdivr z1.s, p0/m, z1.s, z0.s --- Just need a valid instruction to hijack + const std::array validInstrBytes = {0x01, 0x80, 0x8c, 0x65}; + + /** Helper constants for AArch64 general-purpose registers. */ + static constexpr Register R0 = {RegisterType::GENERAL, 0}; + static constexpr Register R1 = {RegisterType::GENERAL, 1}; + static constexpr Register R2 = {RegisterType::GENERAL, 2}; + static constexpr Register R3 = {RegisterType::GENERAL, 3}; + static constexpr Register R4 = {RegisterType::GENERAL, 4}; + static constexpr Register R5 = {RegisterType::GENERAL, 5}; + static constexpr Register R8 = {RegisterType::GENERAL, 8}; +}; + +// The following exceptions are tested in /test/regression/aarch64/Exception.cc +// - InstructionException::StreamingModeUpdate, +// - InstructionException::ZAregisterStatusUpdate, +// - InstructionException::SMZAUpdate +// All system calls are tested in /test/regression/aarch64/Syscall.cc + +// Test that a syscall is processed sucessfully +TEST_F(AArch64ExceptionHandlerTest, testSyscall) { + // Create "syscall" instruction + uint64_t insnAddr = 0x4; + MacroOp uops; + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + InstructionException exception = InstructionException::SupervisorCall; + std::shared_ptr insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + insn->setInstructionAddress(insnAddr); + + // Setup register file for `uname` syscall (chosen as minimal functionality) + archRegFileSet.set(R0, RegisterValue(1234, 8)); + archRegFileSet.set(R8, RegisterValue(160, 8)); + + // Create ExceptionHandler + ExceptionHandler handler(insn, core, memory, kernel); + + // Tick exceptionHandler + ON_CALL(core, getArchitecturalRegisterFileSet()) + .WillByDefault(ReturnRef(archRegFileSet)); + EXPECT_CALL(core, getArchitecturalRegisterFileSet()).Times(1); + bool retVal = handler.tick(); + ExceptionResult result = handler.getResult(); + + EXPECT_TRUE(retVal); + EXPECT_FALSE(result.fatal); + EXPECT_EQ(result.instructionAddress, insnAddr + 4); + EXPECT_EQ(result.stateChange.type, ChangeType::REPLACEMENT); + std::vector modRegs = {R0}; + EXPECT_EQ(result.stateChange.modifiedRegisters, modRegs); + std::vector modRegVals = {{0ull, 8}}; + EXPECT_EQ(result.stateChange.modifiedRegisterValues, modRegVals); + std::vector modMemTargets = {{1234, 6}, + {1234 + 65, 25}, + {1234 + (65 * 2), 7}, + {1234 + (65 * 3), 39}, + {1234 + (65 * 4), 8}}; + EXPECT_EQ(result.stateChange.memoryAddresses, modMemTargets); + std::vector modMemVals = { + RegisterValue("Linux"), RegisterValue("simeng.hpc.cs.bris.ac.uk"), + RegisterValue("4.14.0"), + RegisterValue("#1 SimEng Mon Apr 29 16:28:37 UTC 2019"), + RegisterValue("aarch64")}; + EXPECT_EQ(result.stateChange.memoryAddressValues, modMemVals); +} + +// Test that `readStringThen()` operates as expected +TEST_F(AArch64ExceptionHandlerTest, readStringThen) { + // Create new mock instruction and ExceptionHandler + std::shared_ptr uopPtr(new MockInstruction); + ExceptionHandler handler(uopPtr, core, memory, kernel); + + // Initialise variables + size_t retVal = 0; + char* buffer; + buffer = (char*)malloc(256); + for (int i = 0; i < 256; i++) { + buffer[i] = 'q'; + } + uint64_t addr = 1024; + int maxLen = kernel::Linux::LINUX_PATH_MAX; + + MemoryAccessTarget target1 = {addr, 1}; + MemoryReadResult res1 = {target1, RegisterValue(0xAB, 1), 1}; + span res1Span = span(&res1, 1); + + MemoryAccessTarget target2 = {addr + 1, 1}; + MemoryReadResult res2 = {target2, RegisterValue(static_cast('\0'), 1), + 1}; + span res2Span = span(&res2, 1); + + // On first call to readStringThen, expect return of false and retVal to still + // be 0, and buffer to be filled with `q` + MemoryAccessTarget tar = {addr, 1}; + EXPECT_CALL(memory, requestRead(tar, 0)).Times(1); + bool outcome = + handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) { + retVal = length; + return true; + }); + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + for (int i = 0; i < 256; i++) { + EXPECT_EQ(buffer[i], 'q'); + } + + // ResumeHandling (called on tick()) should now be set to `readStringThen()` + // so call this for our second pass. + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span())); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + outcome = handler.tick(); + // No memory reads completed yet so again expect to return false and no change + // to `retval` or buffer + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + for (int i = 0; i < 256; i++) { + EXPECT_EQ(buffer[i], 'q'); + } + + // Call tick() again, but mimic a memory read completing + tar = {addr + 1, 1}; + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res1Span)); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + EXPECT_CALL(memory, requestRead(tar, 0)).Times(1); + outcome = handler.tick(); + // Completed read but still not complete, so outcome should be false, retVal + // unchanged, but some data in the buffer + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + for (int i = 0; i < 256; i++) { + if (i == 0) { + EXPECT_EQ(buffer[i], (char)0xAB); + } else { + EXPECT_EQ(buffer[i], 'q'); + } + } + + // Call tick() for a final time, getting the final read result + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res2Span)); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + outcome = handler.tick(); + // End of string '\0' found so expect `then()` to have been called, the + // outcome to be true, and the buffer again to have updated + EXPECT_TRUE(outcome); + EXPECT_EQ(retVal, 1); + for (int i = 0; i < 256; i++) { + if (i == 0) { + EXPECT_EQ(buffer[i], (char)0xAB); + } else if (i == 1) { + EXPECT_EQ(buffer[i], '\0'); + } else { + EXPECT_EQ(buffer[i], 'q'); + } + } +} + +// Test that in `readStringThen()` if max length is 0, then is called straight +// away +TEST_F(AArch64ExceptionHandlerTest, readStringThen_maxLen0) { + // Create new mock instruction and ExceptionHandler + std::shared_ptr uopPtr(new MockInstruction); + ExceptionHandler handler(uopPtr, core, memory, kernel); + size_t retVal = 100; + char* buffer; + buffer = (char*)malloc(256); + for (int i = 0; i < 256; i++) { + buffer[i] = 'q'; + } + uint64_t addr = 1024; + int maxLen = 0; + + bool outcome = + handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) { + retVal = length; + return true; + }); + EXPECT_TRUE(outcome); + EXPECT_EQ(retVal, -1); + for (int i = 0; i < 256; i++) { + EXPECT_EQ(buffer[i], 'q'); + } +} + +// Test that in `readStringThen()` if max length has been met, then() is called +// and no more string is fetched +TEST_F(AArch64ExceptionHandlerTest, readStringThen_maxLenReached) { + // Create new mock instruction and ExceptionHandler + std::shared_ptr uopPtr(new MockInstruction); + ExceptionHandler handler(uopPtr, core, memory, kernel); + + // Initialise variables + size_t retVal = 100; + char* buffer; + buffer = (char*)malloc(256); + for (int i = 0; i < 256; i++) { + buffer[i] = 'q'; + } + uint64_t addr = 1024; + int maxLen = 1; + + MemoryAccessTarget target1 = {addr, 1}; + MemoryReadResult res1 = {target1, RegisterValue(0xAB, 1), 1}; + span res1Span = span(&res1, 1); + + // On first call to readStringThen, expect return of false and retVal to still + // be 0, and buffer to be filled with `q` + MemoryAccessTarget tar = {addr, 1}; + EXPECT_CALL(memory, requestRead(tar, 0)).Times(1); + bool outcome = + handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) { + retVal = length; + return true; + }); + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 100); + for (int i = 0; i < 256; i++) { + EXPECT_EQ(buffer[i], 'q'); + } + + // ResumeHandling (called on tick()) should now be set to `readStringThen()` + // so call this for our second pass. + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span())); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + outcome = handler.tick(); + // No memory reads completed yet so again expect to return false and no change + // to `retval` or buffer + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 100); + for (int i = 0; i < 256; i++) { + EXPECT_EQ(buffer[i], 'q'); + } + + // Call tick() again, but mimic a memory read completing + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res1Span)); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + outcome = handler.tick(); + // Completed read and maxLength reached. Expect then() to have been called, + // the outcome to be true, and the buffer to have updated. RetVal should be + // maxLength + EXPECT_TRUE(outcome); + EXPECT_EQ(retVal, 1); + for (int i = 0; i < 256; i++) { + if (i == 0) { + EXPECT_EQ(buffer[i], (char)0xAB); + } else { + EXPECT_EQ(buffer[i], 'q'); + } + } +} + +// Test that `readBufferThen()` operates as expected +TEST_F(AArch64ExceptionHandlerTest, readBufferThen) { + // Create new mock instruction and ExceptionHandler + std::shared_ptr uopPtr(new MockInstruction); + uopPtr->setSequenceId(5); + ExceptionHandler handler(uopPtr, core, memory, kernel); + + // Initialise needed values for function + uint64_t retVal = 0; + uint64_t ptr = 0; + uint64_t length = 192; + + // Initialise data to "read" from MockMemory + std::vector dataVec(length, 'q'); + std::vector dataVec2(length, 'q'); + // Initialise the two required targets (128-bytes per read request in + // readBufferThen()) + MemoryAccessTarget tar1 = {ptr, 128}; + MemoryAccessTarget tar2 = {ptr + 128, static_cast(length - 128)}; + // Initialise "responses" from the MockMemory + MemoryReadResult res1 = {tar1, RegisterValue(dataVec.data() + ptr, 128), + uopPtr->getSequenceId()}; + MemoryReadResult res2 = { + tar2, RegisterValue(dataVec.data() + ptr + 128, length - 128), + uopPtr->getSequenceId()}; + + // Confirm that internal dataBuffer is empty + EXPECT_EQ(handler.dataBuffer.size(), 0); + + // Initial call to readBufferThen - expect resumeHandling to be updated to + // readBufferThen and a memory read request to have occurred + EXPECT_CALL(memory, requestRead(tar1, uopPtr->getSequenceId())).Times(1); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + bool outcome = handler.readBufferThen(ptr, length, [&retVal]() { + retVal = 10; + return true; + }); + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + EXPECT_EQ(handler.dataBuffer.size(), 0); + + // Can now call tick() - on call, emulate no reads completed + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span())); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + outcome = handler.tick(); + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + EXPECT_EQ(handler.dataBuffer.size(), 0); + + // Call tick() again, simulating completed read + new read requested as still + // data to fetch + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span(&res1, 1))); + // Make sure clearCompletedReads() alters functionality of getCompletedReads() + ON_CALL(memory, clearCompletedReads()) + .WillByDefault(::testing::InvokeWithoutArgs([&]() { + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span())); + })); + EXPECT_CALL(memory, getCompletedReads()).Times(2); + EXPECT_CALL(memory, clearCompletedReads()).Times(1); + EXPECT_CALL(memory, requestRead(tar2, uopPtr->getSequenceId())).Times(1); + outcome = handler.tick(); + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + EXPECT_EQ(handler.dataBuffer.size(), 128); + for (int i = 0; i < handler.dataBuffer.size(); i++) { + EXPECT_EQ(handler.dataBuffer[i], 'q'); + } + + // One final call to tick() to get last bits of data from memory and call + // then() + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span(&res2, 1))); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + EXPECT_CALL(memory, clearCompletedReads()).Times(1); + outcome = handler.tick(); + EXPECT_TRUE(outcome); + EXPECT_EQ(retVal, 10); + EXPECT_EQ(handler.dataBuffer.size(), length); + for (int i = 0; i < length; i++) { + EXPECT_EQ(handler.dataBuffer[i], static_cast('q')); + } +} + +// Test that `readBufferThen()` calls then if length is 0 +TEST_F(AArch64ExceptionHandlerTest, readBufferThen_length0) { + // Create new mock instruction and ExceptionHandler + std::shared_ptr uopPtr(new MockInstruction); + ExceptionHandler handler(uopPtr, core, memory, kernel); + + const size_t expectedVal = 10; + uint64_t retVal = 0; + uint64_t ptr = 0; + uint64_t length = 0; + + bool outcome = handler.readBufferThen(ptr, length, [&retVal]() { + retVal = 10; + return true; + }); + EXPECT_TRUE(outcome); + EXPECT_EQ(retVal, expectedVal); +} + +// Test that all AArch64 exception types print as expected +TEST_F(AArch64ExceptionHandlerTest, printException) { + ON_CALL(core, getArchitecturalRegisterFileSet()) + .WillByDefault(ReturnRef(archRegFileSet)); + uint64_t insnAddr = 0x4; + MacroOp uops; + + // Create instruction for EncodingUnallocated + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + InstructionException exception = InstructionException::EncodingUnallocated; + std::shared_ptr insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_0(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + std::stringstream buffer; + std::streambuf* sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_0.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered illegal " + "instruction exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for ExecutionNotYetImplemented + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::ExecutionNotYetImplemented; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_1(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_1.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered execution " + "not-yet-implemented exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for AliasNotYetImplemented + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::AliasNotYetImplemented; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_2(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_2.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "alias not-yet-implemented exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for MisalignedPC + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::MisalignedPC; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_3(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_3.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered misaligned " + "program counter exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for DataAbort + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::DataAbort; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_4(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_4.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT( + buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered data abort exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for SupervisorCall + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::SupervisorCall; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_5(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_5.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT( + buffer.str(), + HasSubstr( + "[SimEng:ExceptionHandler] Encountered supervisor call exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for HypervisorCall + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::HypervisorCall; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_6(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_6.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT( + buffer.str(), + HasSubstr( + "[SimEng:ExceptionHandler] Encountered hypervisor call exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for SecureMonitorCall + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::SecureMonitorCall; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_7(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_7.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "secure monitor call exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for NoAvailablePort + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::NoAvailablePort; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_8(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_8.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "unsupported execution port exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for UnmappedSysReg + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::UnmappedSysReg; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_9(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_9.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "unmapped system register exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for StreamingModeUpdate + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::StreamingModeUpdate; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_10(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_10.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "streaming mode update exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for ZAregisterStatusUpdate + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::ZAregisterStatusUpdate; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_11(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_11.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "ZA register status update exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for SMZAUpdate + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::SMZAUpdate; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_12(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_12.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered streaming mode " + "& ZA register status update exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for ZAdisabled + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::ZAdisabled; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_13(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_13.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered ZA register " + "access attempt when disabled exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for SMdisabled + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::SMdisabled; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_14(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_14.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered SME execution " + "attempt when streaming mode disabled exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for default case + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::None; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_15(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_15.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered unknown (id: " + "0) exception")); + buffer.str(std::string()); + uops.clear(); +} + +} // namespace aarch64 +} // namespace arch +} // namespace simeng \ No newline at end of file diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc new file mode 100644 index 0000000000..91c2e6946c --- /dev/null +++ b/test/unit/aarch64/InstructionTest.cc @@ -0,0 +1,633 @@ +#include "../ConfigInit.hh" +#include "../MockArchitecture.hh" +#include "arch/aarch64/InstructionMetadata.hh" +#include "gmock/gmock.h" +#include "simeng/arch/aarch64/Instruction.hh" +#include "simeng/version.hh" + +namespace simeng { +namespace arch { +namespace aarch64 { + +// AArch64 Instruction Tests +class AArch64InstructionTest : public testing::Test { + public: + AArch64InstructionTest() + : os(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"] + .as()), + arch(os) { + // Create InstructionMetadata objects + cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &capstoneHandle); + cs_option(capstoneHandle, CS_OPT_DETAIL, CS_OPT_ON); + + // Create instructions which cover the 3 main types: Arithmetic, Memory, + // Branch. This allows for full testing of the Instruction class. + + // fdiv + cs_insn rawInsn_fdiv; + cs_detail rawDetail_fdiv; + rawInsn_fdiv.detail = &rawDetail_fdiv; + size_t size_fdiv = 4; + uint64_t address_fdiv = 0; + const uint8_t* encoding_fdiv = + reinterpret_cast(fdivInstrBytes.data()); + cs_disasm_iter(capstoneHandle, &encoding_fdiv, &size_fdiv, &address_fdiv, + &rawInsn_fdiv); + fdivMetadata = std::make_unique(rawInsn_fdiv); + + // ldp + cs_insn rawInsn_ldp; + cs_detail rawDetail_ldp; + rawInsn_ldp.detail = &rawDetail_ldp; + size_t size_ldp = 4; + uint64_t address_ldp = 0; + const uint8_t* encoding_ldp = + reinterpret_cast(ldpInstrBytes.data()); + cs_disasm_iter(capstoneHandle, &encoding_ldp, &size_ldp, &address_ldp, + &rawInsn_ldp); + ldpMetadata = std::make_unique(rawInsn_ldp); + + // cbz + cs_insn rawInsn_cbz; + cs_detail rawDetail_cbz; + rawInsn_cbz.detail = &rawDetail_cbz; + size_t size_cbz = 4; + uint64_t address_cbz = 0; + const uint8_t* encoding_cbz = + reinterpret_cast(cbzInstrBytes.data()); + cs_disasm_iter(capstoneHandle, &encoding_cbz, &size_cbz, &address_cbz, + &rawInsn_cbz); + cbzMetadata = std::make_unique(rawInsn_cbz); + + const uint8_t* badEncoding = + reinterpret_cast(invalidInstrBytes.data()); + invalidMetadata = std::make_unique(badEncoding); + } + + ~AArch64InstructionTest() { cs_close(&capstoneHandle); } + + protected: + ConfigInit configInit = ConfigInit(config::ISA::AArch64, ""); + + // fdivr z1.s, p0/m, z1.s, z0.s + std::array fdivInstrBytes = {0x01, 0x80, 0x8c, 0x65}; + // ldp x1, x2, [x3] + std::array ldpInstrBytes = {0x61, 0x08, 0x40, 0xA9}; + // cbz x2, #0x28 + std::array cbzInstrBytes = {0x42, 0x01, 0x00, 0xB4}; + std::array invalidInstrBytes = {0x20, 0x00, 0x02, 0x8c}; + + // A Capstone decoding library handle, for decoding instructions. + csh capstoneHandle; + + kernel::Linux os; + Architecture arch; + + std::unique_ptr fdivMetadata; + std::unique_ptr ldpMetadata; + std::unique_ptr cbzMetadata; + std::unique_ptr invalidMetadata; + std::unique_ptr uopInfo; + InstructionException exception; +}; + +// Test that a valid instruction is created correctly +TEST_F(AArch64InstructionTest, validInsn) { + // Insn is `fdivr z1.s, p0/m, z1.s, z0.s` + Instruction insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo()); + // Define instruction's registers + std::vector destRegs = {{RegisterType::VECTOR, 1}}; + std::vector srcRegs = {{RegisterType::PREDICATE, 0}, + {RegisterType::VECTOR, 1}, + {RegisterType::VECTOR, 0}}; + const std::vector ports = {1, 2, 3}; + insn.setExecutionInfo({3, 4, ports}); + insn.setInstructionAddress(0x48); + insn.setInstructionId(11); + insn.setSequenceId(12); + + // Ensure that all instruction values are as expected after creation + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false; + EXPECT_EQ(&insn.getArchitecture(), &arch); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_TRUE(matchingPred); + EXPECT_EQ(insn.getBranchType(), BranchType::Unknown); + EXPECT_EQ(insn.getData().size(), 0); + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size()); + for (int i = 0; i < destRegs.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]); + } + EXPECT_EQ(insn.getException(), InstructionException::None); + EXPECT_EQ(insn.getGeneratedAddresses().size(), 0); + EXPECT_EQ(insn.getGroup(), InstructionGroups::SVE_DIV_OR_SQRT); + EXPECT_EQ(insn.getInstructionAddress(), 0x48); + EXPECT_EQ(insn.getInstructionId(), 11); + EXPECT_EQ(insn.getKnownOffset(), 0); + EXPECT_EQ(insn.getLatency(), 3); + EXPECT_EQ(insn.getLSQLatency(), 1); + EXPECT_EQ(&insn.getMetadata(), fdivMetadata.get()); + EXPECT_EQ(insn.getMicroOpIndex(), 0); + // Results vector resized at decode + EXPECT_EQ(insn.getResults().size(), 1); + EXPECT_EQ(insn.getSequenceId(), 12); + // Operands vector resized at decode + EXPECT_EQ(insn.getSourceOperands().size(), 3); + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size()); + for (int i = 0; i < srcRegs.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]); + EXPECT_FALSE(insn.isOperandReady(i)); + } + EXPECT_EQ(insn.getStallCycles(), 4); + EXPECT_EQ(insn.getSupportedPorts(), ports); + + EXPECT_FALSE(insn.canExecute()); + EXPECT_FALSE(insn.isStoreAddress()); + EXPECT_FALSE(insn.isStoreData()); + EXPECT_FALSE(insn.isLoad()); + EXPECT_FALSE(insn.isBranch()); + EXPECT_FALSE(insn.exceptionEncountered()); + EXPECT_FALSE(insn.hasExecuted()); + EXPECT_FALSE(insn.canCommit()); + EXPECT_TRUE(insn.hasAllData()); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_FALSE(insn.isFlushed()); + EXPECT_FALSE(insn.isMicroOp()); + EXPECT_TRUE(insn.isLastMicroOp()); + EXPECT_FALSE(insn.isWaitingCommit()); +} + +// Test that an invalid instruction can be created - invalid due to byte stream +TEST_F(AArch64InstructionTest, invalidInsn_1) { + Instruction insn = Instruction(arch, *invalidMetadata.get(), MicroOpInfo()); + // Define instruction's registers + std::vector destRegs = {}; + std::vector srcRegs = {}; + const std::vector ports = {}; + insn.setExecutionInfo({1, 1, ports}); + insn.setInstructionAddress(0x44); + insn.setInstructionId(13); + insn.setSequenceId(14); + + // Ensure that all instruction values are as expected after creation + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false; + EXPECT_EQ(&insn.getArchitecture(), &arch); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_TRUE(matchingPred); + EXPECT_EQ(insn.getBranchType(), BranchType::Unknown); + EXPECT_EQ(insn.getData().size(), 0); + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size()); + for (int i = 0; i < destRegs.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]); + } + EXPECT_EQ(insn.getException(), InstructionException::EncodingUnallocated); + EXPECT_EQ(insn.getGeneratedAddresses().size(), 0); + // Default Group + EXPECT_EQ(insn.getGroup(), InstructionGroups::INT_SIMPLE_ARTH_NOSHIFT); + EXPECT_EQ(insn.getInstructionAddress(), 0x44); + EXPECT_EQ(insn.getInstructionId(), 13); + EXPECT_EQ(insn.getKnownOffset(), 0); + EXPECT_EQ(insn.getLatency(), 1); + EXPECT_EQ(insn.getLSQLatency(), 1); + EXPECT_EQ(&insn.getMetadata(), invalidMetadata.get()); + EXPECT_EQ(insn.getMicroOpIndex(), 0); + // Results vector resized at decode + EXPECT_EQ(insn.getResults().size(), 0); + EXPECT_EQ(insn.getSequenceId(), 14); + // Operands vector resized at decode + EXPECT_EQ(insn.getSourceOperands().size(), 0); + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size()); + for (int i = 0; i < srcRegs.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]); + EXPECT_FALSE(insn.isOperandReady(i)); + } + EXPECT_EQ(insn.getStallCycles(), 1); + EXPECT_EQ(insn.getSupportedPorts(), ports); + + EXPECT_TRUE(insn.canExecute()); + EXPECT_FALSE(insn.isStoreAddress()); + EXPECT_FALSE(insn.isStoreData()); + EXPECT_FALSE(insn.isLoad()); + EXPECT_FALSE(insn.isBranch()); + EXPECT_TRUE(insn.exceptionEncountered()); + EXPECT_FALSE(insn.hasExecuted()); + EXPECT_FALSE(insn.canCommit()); + EXPECT_TRUE(insn.hasAllData()); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_FALSE(insn.isFlushed()); + EXPECT_FALSE(insn.isMicroOp()); + EXPECT_TRUE(insn.isLastMicroOp()); + EXPECT_FALSE(insn.isWaitingCommit()); +} + +// Test that an invalid instruction can be created - invalid due to exception +// provided +TEST_F(AArch64InstructionTest, invalidInsn_2) { + Instruction insn = Instruction(arch, *invalidMetadata.get(), + InstructionException::HypervisorCall); + // Define instruction's registers + std::vector destRegs = {}; + std::vector srcRegs = {}; + const std::vector ports = {}; + insn.setExecutionInfo({1, 1, ports}); + insn.setInstructionAddress(0x43); + insn.setInstructionId(15); + insn.setSequenceId(16); + + // Ensure that all instruction values are as expected after creation + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false; + EXPECT_EQ(&insn.getArchitecture(), &arch); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_TRUE(matchingPred); + EXPECT_EQ(insn.getBranchType(), BranchType::Unknown); + EXPECT_EQ(insn.getData().size(), 0); + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size()); + for (int i = 0; i < destRegs.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]); + } + EXPECT_EQ(insn.getException(), InstructionException::HypervisorCall); + EXPECT_EQ(insn.getGeneratedAddresses().size(), 0); + // Default Group + EXPECT_EQ(insn.getGroup(), InstructionGroups::INT_SIMPLE_ARTH_NOSHIFT); + EXPECT_EQ(insn.getInstructionAddress(), 0x43); + EXPECT_EQ(insn.getInstructionId(), 15); + EXPECT_EQ(insn.getKnownOffset(), 0); + EXPECT_EQ(insn.getLatency(), 1); + EXPECT_EQ(insn.getLSQLatency(), 1); + EXPECT_EQ(&insn.getMetadata(), invalidMetadata.get()); + EXPECT_EQ(insn.getMicroOpIndex(), 0); + // Results vector resized at decode + EXPECT_EQ(insn.getResults().size(), 0); + EXPECT_EQ(insn.getSequenceId(), 16); + // Operands vector resized at decode + EXPECT_EQ(insn.getSourceOperands().size(), 0); + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size()); + for (int i = 0; i < srcRegs.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]); + EXPECT_FALSE(insn.isOperandReady(i)); + } + EXPECT_EQ(insn.getStallCycles(), 1); + EXPECT_EQ(insn.getSupportedPorts(), ports); + + EXPECT_TRUE(insn.canExecute()); + EXPECT_FALSE(insn.isStoreAddress()); + EXPECT_FALSE(insn.isStoreData()); + EXPECT_FALSE(insn.isLoad()); + EXPECT_FALSE(insn.isBranch()); + EXPECT_TRUE(insn.exceptionEncountered()); + EXPECT_FALSE(insn.hasExecuted()); + EXPECT_FALSE(insn.canCommit()); + EXPECT_TRUE(insn.hasAllData()); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_FALSE(insn.isFlushed()); + EXPECT_FALSE(insn.isMicroOp()); + EXPECT_TRUE(insn.isLastMicroOp()); + EXPECT_FALSE(insn.isWaitingCommit()); +} + +// Test to ensure that source and operand registers can be renamed correctly +TEST_F(AArch64InstructionTest, renameRegs) { + // Insn is `fdivr z1.s, p0/m, z1.s, z0.s` + Instruction insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo()); + // Define instruction's registers + std::vector destRegs = {{RegisterType::VECTOR, 1}}; + std::vector srcRegs = {{RegisterType::PREDICATE, 0}, + {RegisterType::VECTOR, 1}, + {RegisterType::VECTOR, 0}}; + // Ensure registers decoded correctly + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size()); + for (int i = 0; i < srcRegs.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]); + } + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size()); + for (int i = 0; i < destRegs.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]); + } + + // Define renamed registers + std::vector destRegs_new = {{RegisterType::VECTOR, 24}}; + std::vector srcRegs_new = {{RegisterType::PREDICATE, 0}, + {RegisterType::VECTOR, 97}, + {RegisterType::VECTOR, 0}}; + insn.renameDestination(0, destRegs_new[0]); + insn.renameSource(1, srcRegs_new[1]); + // Ensure renaming functionality works as expected + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs_new.size()); + for (int i = 0; i < srcRegs_new.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs_new[i]); + } + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs_new.size()); + for (int i = 0; i < destRegs_new.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs_new[i]); + } +} + +// Test that operand values can be properly supplied and change the state of +// `canExecute` +TEST_F(AArch64InstructionTest, supplyOperand) { + // Insn is `fdivr z1.s, p0/m, z1.s, z0.s` + Instruction insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo()); + // Define instruction's registers + std::vector destRegs = {{RegisterType::VECTOR, 1}}; + std::vector srcRegs = {{RegisterType::PREDICATE, 0}, + {RegisterType::VECTOR, 1}, + {RegisterType::VECTOR, 0}}; + // Check initial state is as expected + EXPECT_FALSE(insn.canExecute()); + EXPECT_FALSE(insn.isOperandReady(0)); + EXPECT_FALSE(insn.isOperandReady(1)); + EXPECT_FALSE(insn.isOperandReady(2)); + + // Define mock register values for source registers + RegisterValue vec = {0xABBACAFE01234567, 256}; + uint64_t pred_vals[4] = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; + RegisterValue pred = {pred_vals, 32}; + // Supply values for all source registers + insn.supplyOperand(0, pred); + insn.supplyOperand(1, vec); + insn.supplyOperand(2, vec); + // Ensure Instruction state has updated as expected + EXPECT_TRUE(insn.canExecute()); + EXPECT_TRUE(insn.isOperandReady(0)); + EXPECT_TRUE(insn.isOperandReady(1)); + EXPECT_TRUE(insn.isOperandReady(2)); + auto sourceVals = insn.getSourceOperands(); + EXPECT_EQ(sourceVals.size(), 3); + EXPECT_EQ(sourceVals[0], pred); + EXPECT_EQ(sourceVals[1], vec); + EXPECT_EQ(sourceVals[2], vec); + + // Ensure instruction execute updates instruction state as expected, and + // produces the expected result. + EXPECT_FALSE(insn.hasExecuted()); + insn.execute(); + EXPECT_TRUE(insn.hasExecuted()); + auto results = insn.getResults(); + uint64_t vals[2] = {0x3f8000003f800000, 0x7fc000007fc00000}; + RegisterValue refRes = {vals, 256}; + EXPECT_EQ(results.size(), 1); + EXPECT_EQ(results[0], refRes); +} + +// Test that data can be supplied successfully +TEST_F(AArch64InstructionTest, supplyData) { + // Insn is `ldp x1, x2, [x3]` + Instruction insn = Instruction(arch, *ldpMetadata.get(), MicroOpInfo()); + // Define instruction's registers + std::vector destRegs = {{RegisterType::GENERAL, 1}, + {RegisterType::GENERAL, 2}}; + std::vector srcRegs = {{RegisterType::GENERAL, 3}}; + + // Check instruction created correctly + EXPECT_FALSE(insn.exceptionEncountered()); + EXPECT_EQ(&insn.getMetadata(), ldpMetadata.get()); + EXPECT_EQ(insn.getGroup(), InstructionGroups::LOAD_INT); + + // Check source and destination registers extracted correctly + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size()); + for (int i = 0; i < srcRegs.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]); + } + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size()); + for (int i = 0; i < destRegs.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]); + } + + // Supply needed operands + EXPECT_FALSE(insn.isOperandReady(0)); + RegisterValue addr = {0x480, 8}; + insn.supplyOperand(0, addr); + EXPECT_TRUE(insn.isOperandReady(0)); + + // Generate memory addresses + EXPECT_EQ(insn.getGeneratedAddresses().size(), 0); + insn.generateAddresses(); + auto generatedAddresses = insn.getGeneratedAddresses(); + EXPECT_EQ(generatedAddresses.size(), 2); + for (int i = 0; i < generatedAddresses.size(); i++) { + EXPECT_EQ(generatedAddresses[i].address, 0x480 + (i * 0x8)); + EXPECT_EQ(generatedAddresses[i].size, 8); + } + + // Supply required data + EXPECT_FALSE(insn.hasAllData()); + std::vector data = {{123, 8}, {456, 8}}; + EXPECT_EQ(generatedAddresses.size(), data.size()); + for (int i = 0; i < generatedAddresses.size(); i++) { + insn.supplyData(generatedAddresses[i].address, data[i]); + } + // Ensure data was supplied correctly + auto retrievedData = insn.getData(); + for (int i = 0; i < retrievedData.size(); i++) { + EXPECT_EQ(retrievedData[i], data[i]); + } + EXPECT_TRUE(insn.hasAllData()); +} + +// Test DataAbort Exception is triggered correctly when supplying data +TEST_F(AArch64InstructionTest, supplyData_dataAbort) { + // Insn is `ldp x1, x2, [x3]` + Instruction insn = Instruction(arch, *ldpMetadata.get(), MicroOpInfo()); + // Define instruction's registers + std::vector destRegs = {{RegisterType::GENERAL, 1}, + {RegisterType::GENERAL, 2}}; + std::vector srcRegs = {{RegisterType::GENERAL, 3}}; + + // Check instruction created correctly + EXPECT_EQ(&insn.getMetadata(), ldpMetadata.get()); + EXPECT_EQ(insn.getGroup(), InstructionGroups::LOAD_INT); + + // Supply needed operands + EXPECT_FALSE(insn.isOperandReady(0)); + RegisterValue addr = {0x480, 8}; + insn.supplyOperand(0, addr); + EXPECT_TRUE(insn.isOperandReady(0)); + + // Generate memory addresses + EXPECT_EQ(insn.getGeneratedAddresses().size(), 0); + insn.generateAddresses(); + auto generatedAddresses = insn.getGeneratedAddresses(); + EXPECT_EQ(generatedAddresses.size(), 2); + for (int i = 0; i < generatedAddresses.size(); i++) { + EXPECT_EQ(generatedAddresses[i].address, 0x480 + (i * 0x8)); + EXPECT_EQ(generatedAddresses[i].size, 8); + } + + // Trigger data abort + EXPECT_FALSE(insn.exceptionEncountered()); + insn.supplyData(generatedAddresses[0].address, RegisterValue()); + EXPECT_TRUE(insn.exceptionEncountered()); + EXPECT_EQ(insn.getException(), InstructionException::DataAbort); +} + +// Test to check logic around early branch misprediction logic +TEST_F(AArch64InstructionTest, earlyBranchMisprediction) { + // Insn is `fdivr z1.s, p0/m, z1.s, z0.s` + Instruction insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo()); + insn.setInstructionAddress(64); + + // Check initial state of an instruction's branch related options + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Unknown); + EXPECT_FALSE(insn.isBranch()); + std::tuple tup = {false, insn.getInstructionAddress() + 4}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); + + // Set prediction and ensure expected state changes / outcomes are seen + pred = {true, 0x4848}; + insn.setBranchPrediction(pred); + matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Unknown); + // Check logic of `checkEarlyBranchMisprediction` which is different for + // non-branch instructions + EXPECT_FALSE(insn.isBranch()); + tup = {true, insn.getInstructionAddress() + 4}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); +} + +// Test that a correct prediction (branch taken) is handled correctly +TEST_F(AArch64InstructionTest, correctPred_taken) { + // insn is `cbz x2, #0x28` + Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); + insn.setInstructionAddress(80); + + // Check initial state of an instruction's branch related options + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Conditional); + EXPECT_TRUE(insn.isBranch()); + std::tuple tup = {false, 0}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); + + // Test a correct prediction where branch is taken is handled correctly + pred = {true, 80 + 0x28}; + insn.setBranchPrediction(pred); + matchingPred = (insn.getBranchPrediction() == pred); + insn.supplyOperand(0, RegisterValue(0, 8)); + insn.execute(); + EXPECT_TRUE(matchingPred); + EXPECT_TRUE(insn.wasBranchTaken()); + EXPECT_FALSE(insn.wasBranchMispredicted()); + EXPECT_EQ(insn.getBranchAddress(), pred.target); +} + +// Test that a correct prediction (branch not taken) is handled correctly +TEST_F(AArch64InstructionTest, correctPred_notTaken) { + // insn is `cbz x2, #0x28` + Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); + insn.setInstructionAddress(80); + + // Check initial state of an instruction's branch related options + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Conditional); + EXPECT_TRUE(insn.isBranch()); + std::tuple tup = {false, 0}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); + + // Test a correct prediction where a branch isn't taken is handled correctly + pred = {false, 80 + 4}; + insn.setBranchPrediction(pred); + matchingPred = (insn.getBranchPrediction() == pred); + insn.supplyOperand(0, RegisterValue(1, 8)); + insn.execute(); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_FALSE(insn.wasBranchMispredicted()); + EXPECT_EQ(insn.getBranchAddress(), pred.target); +} + +// Test that an incorrect prediction (wrong target) is handled correctly +TEST_F(AArch64InstructionTest, incorrectPred_target) { + // insn is `cbz x2, #0x28` + Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); + insn.setInstructionAddress(100); + + // Check initial state of an instruction's branch related options + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Conditional); + EXPECT_TRUE(insn.isBranch()); + std::tuple tup = {false, 0}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); + + // Test an incorrect prediction is handled correctly - target is wrong + pred = {true, 80 + 0x28}; + insn.setBranchPrediction(pred); + matchingPred = (insn.getBranchPrediction() == pred); + insn.supplyOperand(0, RegisterValue(0, 8)); + insn.execute(); + EXPECT_TRUE(matchingPred); + EXPECT_TRUE(insn.wasBranchTaken()); + EXPECT_TRUE(insn.wasBranchMispredicted()); + EXPECT_EQ(insn.getBranchAddress(), 100 + 0x28); +} + +// Test that an incorrect prediction (wrong taken) is handled correctly +TEST_F(AArch64InstructionTest, incorrectPred_taken) { + // insn is `cbz x2, #0x28` + Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); + insn.setInstructionAddress(100); + + // Check initial state of an instruction's branch related options + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Conditional); + EXPECT_TRUE(insn.isBranch()); + std::tuple tup = {false, 0}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); + + // Test an incorrect prediction is handled correctly - taken is wrong + pred = {true, 100 + 0x28}; + insn.setBranchPrediction(pred); + matchingPred = (insn.getBranchPrediction() == pred); + insn.supplyOperand(0, RegisterValue(1, 8)); + insn.execute(); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_TRUE(insn.wasBranchMispredicted()); + EXPECT_EQ(insn.getBranchAddress(), 100 + 4); +} + +// Test commit and flush setters such as `setFlushed`, `setCommitReady`, etc. +TEST_F(AArch64InstructionTest, setters) { + // Insn is `fdivr z1.s, p0/m, z1.s, z0.s` + Instruction insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo()); + + EXPECT_FALSE(insn.canCommit()); + insn.setCommitReady(); + EXPECT_TRUE(insn.canCommit()); + + EXPECT_FALSE(insn.isFlushed()); + insn.setFlushed(); + EXPECT_TRUE(insn.isFlushed()); + + EXPECT_FALSE(insn.isWaitingCommit()); + insn.setWaitingCommit(); + EXPECT_TRUE(insn.isWaitingCommit()); +} + +} // namespace aarch64 +} // namespace arch +} // namespace simeng \ No newline at end of file diff --git a/test/unit/data/stream-aarch64.elf b/test/unit/data/stream-aarch64.elf new file mode 100755 index 0000000000..881a5150a0 Binary files /dev/null and b/test/unit/data/stream-aarch64.elf differ diff --git a/test/unit/data/stream.rv32ima.elf b/test/unit/data/stream.rv32ima.elf new file mode 100644 index 0000000000..ded6502b12 Binary files /dev/null and b/test/unit/data/stream.rv32ima.elf differ diff --git a/test/unit/pipeline/A64FXPortAllocatorTest.cc b/test/unit/pipeline/A64FXPortAllocatorTest.cc index 2e7b5df70b..f593788684 100644 --- a/test/unit/pipeline/A64FXPortAllocatorTest.cc +++ b/test/unit/pipeline/A64FXPortAllocatorTest.cc @@ -8,19 +8,29 @@ namespace simeng { namespace pipeline { -std::vector rsFreeEntries = {20, 20, 10, 10, 19}; +class A64FXPortAllocatorTest : public testing::Test { + public: + A64FXPortAllocatorTest() : portAllocator(portArrangement) { + portAllocator.setRSSizeGetter( + [this](std::vector& sizeVec) { rsSizes(sizeVec); }); + } -void rsSizes(std::vector& sizeVec) { sizeVec = rsFreeEntries; } + void rsSizes(std::vector& sizeVec) const { + sizeVec = rsFreeEntries; + } -// Representation of the A64FX port layout -const std::vector> portArrangement = {{0}, {1}, {2}, {3}, - {4}, {5}, {6}, {7}}; + protected: + // Representation of the A64FX reservation station layout + std::vector rsFreeEntries = {20, 20, 10, 10, 19}; + // Representation of the A64FX port layout + const std::vector> portArrangement = { + {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}}; + + A64FXPortAllocator portAllocator; +}; // Tests correct allocation for RSE0/RSE1/BR attribute groups -TEST(A64FXPortAllocatorTest, singlePortAllocation) { - auto portAllocator = A64FXPortAllocator(portArrangement); - portAllocator.setRSSizeGetter( - [](std::vector& sizeVec) { rsSizes(sizeVec); }); +TEST_F(A64FXPortAllocatorTest, singlePortAllocation) { // Allocate in blocks of 4 to simulate dispatch width of 4 and test dispatch // slot logic @@ -57,10 +67,7 @@ TEST(A64FXPortAllocatorTest, singlePortAllocation) { } // Tests correct allocation when for RSX -TEST(A64FXPortAllocatorTest, RSX) { - auto portAllocator = A64FXPortAllocator(portArrangement); - portAllocator.setRSSizeGetter( - [](std::vector& sizeVec) { rsSizes(sizeVec); }); +TEST_F(A64FXPortAllocatorTest, RSX) { rsFreeEntries = {10, 10, 10, 10, 19}; portAllocator.tick(); EXPECT_EQ(portAllocator.allocate({2, 4, 5, 6}), 2); @@ -83,10 +90,7 @@ TEST(A64FXPortAllocatorTest, RSX) { } // Tests correct allocation when for RSE/RSA -TEST(A64FXPortAllocatorTest, RSEA) { - auto portAllocator = A64FXPortAllocator(portArrangement); - portAllocator.setRSSizeGetter( - [](std::vector& sizeVec) { rsSizes(sizeVec); }); +TEST_F(A64FXPortAllocatorTest, RSEA) { rsFreeEntries = {20, 20, 10, 10, 19}; // RSE portAllocator.tick(); @@ -120,10 +124,7 @@ TEST(A64FXPortAllocatorTest, RSEA) { } // Test correct allocation for Table 1 condition -TEST(A64FXPortAllocator, table1) { - auto portAllocator = A64FXPortAllocator(portArrangement); - portAllocator.setRSSizeGetter( - [](std::vector& sizeVec) { rsSizes(sizeVec); }); +TEST_F(A64FXPortAllocatorTest, table1) { rsFreeEntries = {20, 0, 0, 0, 19}; portAllocator.tick(); EXPECT_EQ(portAllocator.allocate({2, 4, 5, 6}), 2); @@ -146,10 +147,7 @@ TEST(A64FXPortAllocator, table1) { } // Test correct allocation for Table 2 condition -TEST(A64FXPortAllocator, table2) { - auto portAllocator = A64FXPortAllocator(portArrangement); - portAllocator.setRSSizeGetter( - [](std::vector& sizeVec) { rsSizes(sizeVec); }); +TEST_F(A64FXPortAllocatorTest, table2) { rsFreeEntries = {20, 20, 0, 0, 19}; portAllocator.tick(); EXPECT_EQ(portAllocator.allocate({2, 4, 5, 6}), 2); @@ -172,10 +170,7 @@ TEST(A64FXPortAllocator, table2) { } // Test correct allocation for Table 3 condition -TEST(A64FXPortAllocator, table3) { - auto portAllocator = A64FXPortAllocator(portArrangement); - portAllocator.setRSSizeGetter( - [](std::vector& sizeVec) { rsSizes(sizeVec); }); +TEST_F(A64FXPortAllocatorTest, table3) { rsFreeEntries = {0, 0, 10, 10, 19}; portAllocator.tick(); EXPECT_EQ(portAllocator.allocate({2, 4, 5, 6}), 5); @@ -198,10 +193,7 @@ TEST(A64FXPortAllocator, table3) { } // Test correct allocation for Table 5 condition -TEST(A64FXPortAllocator, table5) { - auto portAllocator = A64FXPortAllocator(portArrangement); - portAllocator.setRSSizeGetter( - [](std::vector& sizeVec) { rsSizes(sizeVec); }); +TEST_F(A64FXPortAllocatorTest, table5) { rsFreeEntries = {9, 9, 10, 9, 19}; portAllocator.tick(); EXPECT_EQ(portAllocator.allocate({2, 4, 5, 6}), 5); @@ -223,11 +215,8 @@ TEST(A64FXPortAllocator, table5) { rsFreeEntries[1]--; } -// Test correct allocation for Table 6 condition -TEST(A64FXPortAllocator, table6) { - auto portAllocator = A64FXPortAllocator(portArrangement); - portAllocator.setRSSizeGetter( - [](std::vector& sizeVec) { rsSizes(sizeVec); }); +// Test correct allocation for Table 6 condition +TEST_F(A64FXPortAllocatorTest, table6) { rsFreeEntries = {20, 0, 10, 0, 19}; portAllocator.tick(); EXPECT_EQ(portAllocator.allocate({2, 4}), 2); @@ -250,10 +239,7 @@ TEST(A64FXPortAllocator, table6) { } // Test adherence to the dispatch slot logic -TEST(A64FXPortAllocator, dispatchSlots) { - auto portAllocator = A64FXPortAllocator(portArrangement); - portAllocator.setRSSizeGetter( - [](std::vector& sizeVec) { rsSizes(sizeVec); }); +TEST_F(A64FXPortAllocatorTest, dispatchSlots) { rsFreeEntries = {10, 10, 10, 10, 19}; // With less than 4 instructions dispatched in a cycle, the next cycle should diff --git a/test/unit/pipeline/DecodeUnitTest.cc b/test/unit/pipeline/DecodeUnitTest.cc index 71062f35bc..f86dbc0caf 100644 --- a/test/unit/pipeline/DecodeUnitTest.cc +++ b/test/unit/pipeline/DecodeUnitTest.cc @@ -22,6 +22,8 @@ class PipelineDecodeUnitTest : public testing::Test { decodeUnit(input, output, predictor), uop(new MockInstruction), uopPtr(uop), + uop2(new MockInstruction), + uop2Ptr(uop2), sourceRegisters({{0, 0}}) {} protected: @@ -33,6 +35,8 @@ class PipelineDecodeUnitTest : public testing::Test { MockInstruction* uop; std::shared_ptr uopPtr; + MockInstruction* uop2; + std::shared_ptr uop2Ptr; std::vector sourceRegisters; }; @@ -60,6 +64,7 @@ TEST_F(PipelineDecodeUnitTest, Tick) { // Check no flush was requested EXPECT_EQ(decodeUnit.shouldFlush(), false); + EXPECT_EQ(decodeUnit.getEarlyFlushes(), 0); } // Tests that the decode unit requests a flush when a non-branch is mispredicted @@ -84,6 +89,26 @@ TEST_F(PipelineDecodeUnitTest, Flush) { // Check that a flush was correctly requested EXPECT_EQ(decodeUnit.shouldFlush(), true); EXPECT_EQ(decodeUnit.getFlushAddress(), 1); + EXPECT_EQ(decodeUnit.getEarlyFlushes(), 1); +} + +// Tests that PurgeFlushed empties the microOps queue +TEST_F(PipelineDecodeUnitTest, purgeFlushed) { + input.getHeadSlots()[0] = {uopPtr, uop2Ptr}; + + decodeUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0].get(), uop); + EXPECT_EQ(input.getHeadSlots()[0].size(), 0); + + // Clear micro-ops queue + decodeUnit.purgeFlushed(); + // Swap output head and tail + output.tick(); + + decodeUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + EXPECT_EQ(output.getHeadSlots()[0].get(), uop); + EXPECT_EQ(input.getHeadSlots()[0].size(), 0); } } // namespace pipeline diff --git a/test/unit/pipeline/DispatchIssueUnitTest.cc b/test/unit/pipeline/DispatchIssueUnitTest.cc new file mode 100644 index 0000000000..9bd953f567 --- /dev/null +++ b/test/unit/pipeline/DispatchIssueUnitTest.cc @@ -0,0 +1,573 @@ +#include "../ConfigInit.hh" +#include "../MockInstruction.hh" +#include "../MockPortAllocator.hh" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "simeng/pipeline/DispatchIssueUnit.hh" +#include "simeng/version.hh" + +namespace simeng { +namespace pipeline { + +using ::testing::Return; +using ::testing::ReturnRef; + +class PipelineDispatchIssueUnitTest : public testing::Test { + public: + PipelineDispatchIssueUnitTest() + : regFile(physRegStruct), + input(1, nullptr), + output(config::SimInfo::getConfig()["Execution-Units"].num_children(), + {1, nullptr}), + diUnit(input, output, regFile, portAlloc, physRegQuants), + uop(new MockInstruction), + uopPtr(uop), + uop2(new MockInstruction), + uop2Ptr(uop2) {} + + protected: + // More complex model used to enable better testing of the DispatchIssueUnit + // class. + ConfigInit configInit = ConfigInit(config::ISA::AArch64, R"YAML({ + Ports: { + '0': {Portname: Port 0, Instruction-Group-Support: [FP, SVE]}, + '1': {Portname: Port 1, Instruction-Group-Support: [PREDICATE]}, + '2': {Portname: Port 2, Instruction-Group-Support: [INT_SIMPLE, INT_MUL, STORE_DATA]}, + '3': {Portname: Port 3, Instruction-Group-Support: [FP_SIMPLE, FP_MUL, SVE_SIMPLE, SVE_MUL]}, + '4': {Portname: Port 4, Instruction-Group-Support: [INT_SIMPLE, INT_DIV_OR_SQRT]}, + '5': {Portname: Port 5, Instruction-Group-Support: [LOAD, STORE_ADDRESS, INT_SIMPLE_ARTH_NOSHIFT, INT_SIMPLE_LOGICAL_NOSHIFT, INT_SIMPLE_CMP]}, + '6': {Portname: Port 6, Instruction-Group-Support: [LOAD, STORE_ADDRESS, INT_SIMPLE_ARTH_NOSHIFT, INT_SIMPLE_LOGICAL_NOSHIFT, INT_SIMPLE_CMP]}, + '7': {Portname: Port 7, Instruction-Group-Support: [BRANCH]} + }, + Reservation-Stations: { + '0': {Size: 20, Dispatch-Rate: 2, Ports: [Port 0, Port 1, Port 2]}, + '1': {Size: 20, Dispatch-Rate: 2, Ports: [Port 3, Port 4]}, + '2': {Size: 10, Dispatch-Rate: 1, Ports: [Port 5]}, + '3': {Size: 10, Dispatch-Rate: 1, Ports: [Port 6]}, + '4': {Size: 19, Dispatch-Rate: 1, Ports: [Port 7]} + }, + Execution-Units: { + '0': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '1': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '2': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '3': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '4': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '5': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '6': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}, + '7': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]} + } + })YAML"); + + // Using AArch64 as basis: {GP, FP/SVE, PRED, COND, SYS, SME} + const std::vector physRegQuants = {96, 128, 48, 128, 64, 64}; + const std::vector physRegStruct = { + {8, physRegQuants[0]}, {256, physRegQuants[1]}, {32, physRegQuants[2]}, + {1, physRegQuants[3]}, {8, physRegQuants[4]}, {256, physRegQuants[5]}}; + RegisterFileSet regFile; + + PipelineBuffer> input; + std::vector>> output; + + MockPortAllocator portAlloc; + + simeng::pipeline::DispatchIssueUnit diUnit; + + MockInstruction* uop; + std::shared_ptr uopPtr; + MockInstruction* uop2; + std::shared_ptr uop2Ptr; + + // As per a64fx.yaml + const uint16_t EAGA = 5; // Maps to RS index 2 + const uint8_t RS_EAGA = 2; // RS associated with EAGA in A64FX + const std::vector refRsSizes = {20, 20, 10, 10, 19}; + + const Register r0 = {0, 0}; + const Register r1 = {0, 1}; + const Register r2 = {0, 2}; +}; + +// No instruction issued due to empty input buffer +TEST_F(PipelineDispatchIssueUnitTest, emptyTick) { + // Ensure empty Reservation stations pre tick() + std::vector rsSizes; + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); + + diUnit.tick(); + // Post tick(), ensure RS sizes are still the same + no RS stalls + rsSizes.clear(); + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); + EXPECT_EQ(diUnit.getRSStalls(), 0); + + diUnit.issue(); + // Post issue(), ensure Reservation stations are empty + rsSizes.clear(); + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); + // Post issue(), ensure output buffers are empty + for (size_t i = 0; i < output.size(); i++) { + EXPECT_EQ(output[i].getTailSlots()[0], nullptr); + } + // Post issue(), ensure only front-end stall recorded + EXPECT_EQ(diUnit.getFrontendStalls(), 1); + EXPECT_EQ(diUnit.getBackendStalls(), 0); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); +} + +// Single instruction has no exception, 2 source operands (both ready), 1 +// destination operand +TEST_F(PipelineDispatchIssueUnitTest, singleInstr) { + // Set-up source & destination registers and ports for this instruction + std::array srcRegs = {r1, r2}; + std::array destRegs = {r0}; + const std::vector suppPorts = {EAGA}; + + // All expected calls to instruction during tick() + EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts)); + uop->setExceptionEncountered(false); + EXPECT_CALL(*uop, getSourceRegisters()) + .WillOnce(Return(span(srcRegs))); + EXPECT_CALL(*uop, isOperandReady(0)).WillOnce(Return(false)); + EXPECT_CALL(*uop, supplyOperand(0, RegisterValue(0, 8))); + EXPECT_CALL(*uop, isOperandReady(1)).WillOnce(Return(false)); + EXPECT_CALL(*uop, supplyOperand(1, RegisterValue(0, 8))); + EXPECT_CALL(*uop, getDestinationRegisters()) + .WillOnce(Return(span(destRegs))); + + // Expected call to port allocator during tick() + EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA)); + + // Ensure empty reservation stations pre tick() + std::vector rsSizes; + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); + + input.getHeadSlots()[0] = uopPtr; + diUnit.tick(); + // Ensure post tick that EAGA's reservation station size has decreased by 1 + rsSizes.clear(); + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes.size(), refRsSizes.size()); + EXPECT_EQ(rsSizes[RS_EAGA], refRsSizes[RS_EAGA] - 1); + // Ensure no stalls recorded in tick() + EXPECT_EQ(diUnit.getFrontendStalls(), 0); + EXPECT_EQ(diUnit.getBackendStalls(), 0); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 0); + // Ensure empty output buffers post tick() + for (size_t i = 0; i < output.size(); i++) { + EXPECT_EQ(output[i].getTailSlots()[0], nullptr); + } + + // Detail expected call to port allocator during tick() + EXPECT_CALL(portAlloc, issued(EAGA)); + + diUnit.issue(); + // Ensure all reservation stations empty again post issue() + rsSizes.clear(); + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); + // Ensure no stalls recorded during issue() + EXPECT_EQ(diUnit.getFrontendStalls(), 0); + EXPECT_EQ(diUnit.getBackendStalls(), 0); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 0); + // Ensure all output buffers are empty, except the one associated with EAGA + // port which contains the uop + for (size_t i = 0; i < output.size(); i++) { + if (i != EAGA) + EXPECT_EQ(output[i].getTailSlots()[0], nullptr); + else + EXPECT_EQ(output[i].getTailSlots()[0].get(), uop); + } +} + +// Single instruction with exception +TEST_F(PipelineDispatchIssueUnitTest, singleInstr_exception) { + // Setup supported port instruction can use + const std::vector suppPorts = {EAGA}; + + // All expected calls to instruction during tick() + EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts)); + uop->setExceptionEncountered(true); + + input.getHeadSlots()[0] = uopPtr; + diUnit.tick(); + // Check that instruction has encountered an exception and that it is ready to + // commit + EXPECT_TRUE(uop->canCommit()); + EXPECT_TRUE(uop->exceptionEncountered()); + // Ensure all reservation stations are empty post tick() + std::vector rsSizes; + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); + // Ensure input buffer has been emptied + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + + // Perform issue() + diUnit.issue(); + // Ensure RS still empty post issue() + rsSizes.clear(); + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); + // Ensure all output ports are empty + for (int i = 0; i < output.size(); i++) { + EXPECT_EQ(output[i].getTailSlots()[0], nullptr); + } + // Ensure frontend stall recorded + EXPECT_EQ(diUnit.getFrontendStalls(), 1); + EXPECT_EQ(diUnit.getBackendStalls(), 0); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 0); +} + +// Single instruction that can't be issued in 1 cycle as RS is full +TEST_F(PipelineDispatchIssueUnitTest, singleInstr_rsFull) { + // Setup supported port instructions can use + const std::vector suppPorts = {EAGA}; + + // Artificially fill Reservation station with index 2 + std::vector> insns(refRsSizes[RS_EAGA]); + for (int i = 0; i < insns.size(); i++) { + // Initialise instruction + insns[i] = std::make_shared(); + // All expected calls to instruction during tick() + EXPECT_CALL(*insns[i].get(), getSupportedPorts()) + .WillOnce(ReturnRef(suppPorts)); + EXPECT_CALL(*insns[i].get(), getSourceRegisters()) + .WillOnce(Return(span())); + EXPECT_CALL(*insns[i].get(), getDestinationRegisters()) + .WillOnce(Return(span())); + // Expected call to port allocator during tick() + EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA)); + + input.getHeadSlots()[0] = insns[i]; + diUnit.tick(); + } + // Ensure Reservation station index 2 is full post tick, and all others are + // empty + std::vector rsSizes; + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes.size(), refRsSizes.size()); + for (int i = 0; i < refRsSizes.size(); i++) { + if (i != RS_EAGA) { + EXPECT_EQ(rsSizes[i], refRsSizes[i]); + } else { + EXPECT_EQ(rsSizes[i], 0); + EXPECT_NE(rsSizes[i], refRsSizes[i]); + } + } + // Ensure no stalls recorded in tick() + EXPECT_EQ(diUnit.getFrontendStalls(), 0); + EXPECT_EQ(diUnit.getBackendStalls(), 0); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 0); + + // Submit new instruction to same port + // All expected calls to instruction during tick() + EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts)); + // All expected calls to portAllocator during tick() + EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA)); + EXPECT_CALL(portAlloc, deallocate(EAGA)); + input.getHeadSlots()[0] = uopPtr; + diUnit.tick(); + // Ensure Reservation station sizes have stayed the same + rsSizes.clear(); + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes.size(), refRsSizes.size()); + for (int i = 0; i < refRsSizes.size(); i++) { + if (i != RS_EAGA) { + EXPECT_EQ(rsSizes[i], refRsSizes[i]); + } else { + EXPECT_EQ(rsSizes[i], 0); + EXPECT_NE(rsSizes[i], refRsSizes[i]); + } + } + // Check input pipelineBuffer stalled + EXPECT_TRUE(input.isStalled()); + // Ensure one rsStall recorded in tick() + EXPECT_EQ(diUnit.getFrontendStalls(), 0); + EXPECT_EQ(diUnit.getBackendStalls(), 0); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 1); +} + +// Single instruction not issued in 1 cycle as port is stalled +TEST_F(PipelineDispatchIssueUnitTest, singleInstr_portStall) { + // Setup supported port instructions can use + const std::vector suppPorts = {EAGA}; + + // Submit new instruction to a port + // All expected calls to instruction during tick() + EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts)); + uop->setExceptionEncountered(false); + EXPECT_CALL(*uop, getSourceRegisters()).WillOnce(Return(span())); + EXPECT_CALL(*uop, getDestinationRegisters()) + .WillOnce(Return(span())); + // Expected call to portAllocator during tick() + EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA)); + + input.getHeadSlots()[0] = uopPtr; + diUnit.tick(); + + // Ensure correct RS sizes post tick() + std::vector rsSizes; + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes.size(), refRsSizes.size()); + for (int i = 0; i < refRsSizes.size(); i++) { + if (i != RS_EAGA) { + EXPECT_EQ(rsSizes[i], refRsSizes[i]); + } else { + EXPECT_EQ(rsSizes[i], refRsSizes[i] - 1); + } + } + // Ensure no stalls recorded in tick() + EXPECT_EQ(diUnit.getFrontendStalls(), 0); + EXPECT_EQ(diUnit.getBackendStalls(), 0); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 0); + + // Stall issue port + output[EAGA].stall(true); + + // Perform issue() + diUnit.issue(); + // Ensure correct RS sizes post issue() + rsSizes.clear(); + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes.size(), refRsSizes.size()); + for (int i = 0; i < refRsSizes.size(); i++) { + if (i != RS_EAGA) { + EXPECT_EQ(rsSizes[i], refRsSizes[i]); + } else { + EXPECT_EQ(rsSizes[i], refRsSizes[i] - 1); + } + } + // Ensure all output ports are empty + for (int i = 0; i < output.size(); i++) { + EXPECT_EQ(output[i].getTailSlots()[0], nullptr); + } + // Ensure portBusyStall and backend stall recorded in issue() + EXPECT_EQ(diUnit.getFrontendStalls(), 0); + EXPECT_EQ(diUnit.getBackendStalls(), 1); + EXPECT_EQ(diUnit.getPortBusyStalls(), 1); + EXPECT_EQ(diUnit.getRSStalls(), 0); +} + +// Try dispatch two instructions with RAW hazard after renaming, second should +// not be issued as it is dependant on first. Use forwardOperand() to resolve +// dependency. +TEST_F(PipelineDispatchIssueUnitTest, createdependency_raw) { + // Set-up source & destination registers and ports for the instructions + std::array srcRegs_1 = {}; + std::array destRegs_1 = {r0}; + std::array srcRegs_2 = {r0}; + std::array destRegs_2 = {r1}; + const std::vector suppPorts = {EAGA}; + + // All expected calls to instruction 1 during tick() + EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts)); + uop->setExceptionEncountered(false); + EXPECT_CALL(*uop, getSourceRegisters()) + .WillOnce(Return(span(srcRegs_1))); + EXPECT_CALL(*uop, isOperandReady(0)).WillOnce(Return(false)); + EXPECT_CALL(*uop, supplyOperand(0, RegisterValue(0, 8))); + EXPECT_CALL(*uop, getDestinationRegisters()) + .WillOnce(Return(span(destRegs_1))); + // Expected call to port allocator during tick() + EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA)); + EXPECT_CALL(portAlloc, issued(EAGA)); + + // Process instruction 1 + input.getHeadSlots()[0] = uopPtr; + diUnit.tick(); + diUnit.issue(); + EXPECT_EQ(output[EAGA].getTailSlots()[0], uopPtr); + output[EAGA].tick(); + + // All expected calls to instruction 2 during tick() + EXPECT_CALL(*uop2, getSupportedPorts()).WillOnce(ReturnRef(suppPorts)); + uop->setExceptionEncountered(false); + EXPECT_CALL(*uop2, getSourceRegisters()) + .WillOnce(Return(span(srcRegs_2))); + EXPECT_CALL(*uop2, isOperandReady(0)).WillOnce(Return(false)); + EXPECT_CALL(*uop2, getDestinationRegisters()) + .WillOnce(Return(span(destRegs_2))); + // Expected call to port allocator during tick() + EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA)); + + // Process instruction 2 + input.getHeadSlots()[0] = uop2Ptr; + diUnit.tick(); + diUnit.issue(); + // Ensure correct RS sizes post tick() & issue() + std::vector rsSizes; + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes.size(), refRsSizes.size()); + for (int i = 0; i < refRsSizes.size(); i++) { + if (i != RS_EAGA) { + EXPECT_EQ(rsSizes[i], refRsSizes[i]); + } else { + EXPECT_EQ(rsSizes[i], refRsSizes[i] - 1); + } + } + // Ensure all output ports are empty + for (int i = 0; i < output.size(); i++) { + EXPECT_EQ(output[i].getTailSlots()[0], nullptr); + } + // Ensure backend stall recorded in issue() + EXPECT_EQ(diUnit.getFrontendStalls(), 0); + EXPECT_EQ(diUnit.getBackendStalls(), 1); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 0); + + // Forward operand for register r0 + std::array vals = {RegisterValue(6)}; + EXPECT_CALL(*uop2, supplyOperand(0, vals[0])); + EXPECT_CALL(*uop2, canExecute()).WillOnce(Return(true)); + diUnit.forwardOperands(span(srcRegs_2), vals); + + // Try issue again for instruction 2 + EXPECT_CALL(portAlloc, issued(EAGA)); + diUnit.issue(); + // Ensure correct RS sizes post issue() + rsSizes.clear(); + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); + // Ensure all output ports are empty except EAGA + for (int i = 0; i < output.size(); i++) { + if (i != EAGA) + EXPECT_EQ(output[i].getTailSlots()[0], nullptr); + else + EXPECT_EQ(output[i].getTailSlots()[0], uop2Ptr); + } + // Ensure no further stalls recorded in issue() + EXPECT_EQ(diUnit.getFrontendStalls(), 0); + EXPECT_EQ(diUnit.getBackendStalls(), 1); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 0); +} + +// Ensure correct instructions are flushed from reservation stations and the +// dependency matrix +TEST_F(PipelineDispatchIssueUnitTest, purgeFlushed) { + // Set-up source & destination registers and ports for the instructions; + // creating a dependency + std::array srcRegs_1 = {}; + std::array destRegs_1 = {r0}; + std::array srcRegs_2 = {r0}; + std::array destRegs_2 = {r1}; + const std::vector suppPorts = {EAGA}; + + // All expected calls to instruction 1 during tick() + EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts)); + uop->setExceptionEncountered(false); + EXPECT_CALL(*uop, getSourceRegisters()) + .WillOnce(Return(span(srcRegs_1))); + EXPECT_CALL(*uop, isOperandReady(0)).WillOnce(Return(false)); + EXPECT_CALL(*uop, supplyOperand(0, RegisterValue(0, 8))); + EXPECT_CALL(*uop, getDestinationRegisters()) + .WillOnce(Return(span(destRegs_1))); + // Expected call to port allocator during tick() + EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA)); + + // Process instruction 1 + input.getHeadSlots()[0] = uopPtr; + diUnit.tick(); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + + // All expected calls to instruction 2 during tick() + EXPECT_CALL(*uop2, getSupportedPorts()).WillOnce(ReturnRef(suppPorts)); + uop->setExceptionEncountered(false); + EXPECT_CALL(*uop2, getSourceRegisters()) + .WillOnce(Return(span(srcRegs_2))); + EXPECT_CALL(*uop2, isOperandReady(0)).WillOnce(Return(false)); + EXPECT_CALL(*uop2, getDestinationRegisters()) + .WillOnce(Return(span(destRegs_2))); + // Expected call to port allocator during tick() + EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA)); + + // Process instruction 2 + input.getHeadSlots()[0] = uop2Ptr; + diUnit.tick(); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + + // Ensure correct RS sizes post tick() + std::vector rsSizes; + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes.size(), refRsSizes.size()); + for (int i = 0; i < refRsSizes.size(); i++) { + if (i != RS_EAGA) { + EXPECT_EQ(rsSizes[i], refRsSizes[i]); + } else { + EXPECT_EQ(rsSizes[i], refRsSizes[i] - 2); + } + } + // Ensure all output ports are empty + for (int i = 0; i < output.size(); i++) { + EXPECT_EQ(output[i].getTailSlots()[0], nullptr); + } + // Ensure no stalls recorded + EXPECT_EQ(diUnit.getFrontendStalls(), 0); + EXPECT_EQ(diUnit.getBackendStalls(), 0); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 0); + + // Remove flushed uops + EXPECT_CALL(portAlloc, deallocate(EAGA)).Times(2); + uopPtr->setFlushed(); + uop2Ptr->setFlushed(); + diUnit.purgeFlushed(); + + // Check reservation station sizes + rsSizes.clear(); + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); + + // Perform issue to see if `uop` is still present + diUnit.issue(); + // Ensure all output ports are empty + for (int i = 0; i < output.size(); i++) { + EXPECT_EQ(output[i].getTailSlots()[0], nullptr); + } + // Ensure frontend stall recorded in issue() + EXPECT_EQ(diUnit.getFrontendStalls(), 1); + EXPECT_EQ(diUnit.getBackendStalls(), 0); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 0); + + // Call forwardOperand() and issue() to release `uop2` (if it were still + // present) + std::array vals = {RegisterValue(6)}; + diUnit.forwardOperands(span(srcRegs_2), vals); + // Check reservation station sizes + rsSizes.clear(); + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); + + diUnit.issue(); + // Ensure all output ports are empty + for (int i = 0; i < output.size(); i++) { + EXPECT_EQ(output[i].getTailSlots()[0], nullptr); + } + // Ensure frontend stall recorded in issue() + EXPECT_EQ(diUnit.getFrontendStalls(), 2); + EXPECT_EQ(diUnit.getBackendStalls(), 0); + EXPECT_EQ(diUnit.getPortBusyStalls(), 0); + EXPECT_EQ(diUnit.getRSStalls(), 0); +} + +// Test based on a64fx config file reservation staion configuration +TEST_F(PipelineDispatchIssueUnitTest, getRSSizes) { + std::vector rsSizes; + diUnit.getRSSizes(rsSizes); + EXPECT_EQ(rsSizes, refRsSizes); +} + +} // namespace pipeline +} // namespace simeng + +// tick +// issue \ No newline at end of file diff --git a/test/unit/pipeline/ExecuteUnitTest.cc b/test/unit/pipeline/ExecuteUnitTest.cc index eb130f53ad..0f82593ff6 100644 --- a/test/unit/pipeline/ExecuteUnitTest.cc +++ b/test/unit/pipeline/ExecuteUnitTest.cc @@ -56,17 +56,35 @@ class PipelineExecuteUnitTest : public testing::Test { MockInstruction* thirdUop; std::shared_ptr uopPtr; - std::shared_ptr secondUopPtr; - std::shared_ptr thirdUopPtr; + std::shared_ptr secondUopPtr; + std::shared_ptr thirdUopPtr; }; // Tests that the execution unit processes nothing if no instruction is present TEST_F(PipelineExecuteUnitTest, TickEmpty) { + EXPECT_TRUE(executeUnit.isEmpty()); executeUnit.tick(); + EXPECT_TRUE(executeUnit.isEmpty()); EXPECT_EQ(output.getTailSlots()[0], nullptr); } +// Tests that a flushed instruction is removed from the input buffer and not +// processed through the EU +TEST_F(PipelineExecuteUnitTest, flushedInputInsn) { + input.getHeadSlots()[0] = uopPtr; + + // Setup instruction + uopPtr->setFlushed(); + ON_CALL(*uop, canExecute()).WillByDefault(Return(true)); + + executeUnit.tick(); + + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + EXPECT_EQ(executeUnit.getCycles(), 0); +} + // Tests that the execution unit executes an instruction and forwards the // results TEST_F(PipelineExecuteUnitTest, Execute) { @@ -119,7 +137,8 @@ TEST_F(PipelineExecuteUnitTest, ExecuteBranch) { // Check that the branch predictor was updated with the results EXPECT_CALL(*uop, getBranchType()).Times(1); - EXPECT_CALL(predictor, update(2, taken, pc, BranchType::Unconditional)) + EXPECT_CALL(predictor, + update(insnAddress, taken, pc, BranchType::Unconditional)) .Times(1); // Check that empty forwarding call is made @@ -128,8 +147,13 @@ TEST_F(PipelineExecuteUnitTest, ExecuteBranch) { executeUnit.tick(); + EXPECT_EQ(uopPtr->wasBranchMispredicted(), false); + EXPECT_EQ(uopPtr->wasBranchTaken(), taken); + EXPECT_EQ(executeUnit.shouldFlush(), false); EXPECT_EQ(output.getTailSlots()[0].get(), uop); + EXPECT_EQ(executeUnit.getBranchExecutedCount(), 1); + EXPECT_EQ(executeUnit.getBranchMispredictedCount(), 0); } // Test that an instruction that already encountered an exception will raise it @@ -192,7 +216,7 @@ TEST_F(PipelineExecuteUnitTest, PipelineStall) { EXPECT_EQ(input.getHeadSlots()[0].get(), secondUop); EXPECT_EQ(output.getTailSlots()[0], nullptr); executeUnit.tick(); - EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); EXPECT_EQ(output.getTailSlots()[0].get(), uop); } @@ -204,13 +228,14 @@ TEST_F(PipelineExecuteUnitTest, OperationStall) { uop->setLatency(5); uop->setStallCycles(5); ON_CALL(*uop, getGroup()).WillByDefault(Return(3)); + ON_CALL(*uop, canExecute()).WillByDefault(Return(true)); ON_CALL(*secondUop, getGroup()).WillByDefault(Return(4)); + ON_CALL(*secondUop, canExecute()).WillByDefault(Return(true)); ON_CALL(*thirdUop, getGroup()).WillByDefault(Return(2)); - - ON_CALL(*uop, canExecute()).WillByDefault(Return(true)); ON_CALL(*thirdUop, canExecute()).WillByDefault(Return(true)); + EXPECT_CALL(*uop, execute()).Times(1); - EXPECT_CALL(*secondUop, execute()).Times(0); + EXPECT_CALL(*secondUop, execute()).Times(1); EXPECT_CALL(*thirdUop, execute()).Times(1); executeUnit.tick(); @@ -218,21 +243,116 @@ TEST_F(PipelineExecuteUnitTest, OperationStall) { EXPECT_EQ(output.getTailSlots()[0], nullptr); input.getHeadSlots()[0] = secondUopPtr; executeUnit.tick(); - EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); EXPECT_EQ(output.getTailSlots()[0], nullptr); input.getHeadSlots()[0] = thirdUopPtr; executeUnit.tick(); - EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); EXPECT_EQ(output.getTailSlots()[0], nullptr); executeUnit.tick(); - EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); EXPECT_EQ(output.getTailSlots()[0], nullptr); executeUnit.tick(); - EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); EXPECT_EQ(output.getTailSlots()[0].get(), uop); executeUnit.tick(); - EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0].get(), thirdUop); + executeUnit.tick(); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0].get(), secondUop); +} + +// Test that a mispredicted branch instruction is properly handled +TEST_F(PipelineExecuteUnitTest, mispredictedBranch) { + input.getHeadSlots()[0] = uopPtr; + + ON_CALL(*uop, canExecute()).WillByDefault(Return(true)); + // Anticipate testing instruction type; return true for branch + ON_CALL(*uop, isBranch()).WillByDefault(Return(true)); + // Return branch type as unconditional by default + ON_CALL(*uop, getBranchType()).WillByDefault(Return(BranchType::Conditional)); + + const bool takenPred = false; + const bool taken = true; + const uint64_t pc = 4; + const uint64_t insnAddress = 16; + const uint64_t insnID = 5; + + uop->setInstructionAddress(insnAddress); + uop->setInstructionId(insnID); + uop->setBranchPrediction({takenPred, insnAddress + 4}); + + EXPECT_CALL(*uop, execute()).WillOnce(Invoke([&]() { + uop->setExecuted(true); + uop->setBranchResults(taken, pc); + })); + + // Check that the branch predictor was updated with the results + EXPECT_CALL(*uop, getBranchType()).Times(1); + + EXPECT_CALL(predictor, + update(insnAddress, taken, pc, BranchType::Conditional)) + .Times(1); + + // Check that empty forwarding call is made + EXPECT_CALL(executionHandlers, forwardOperands(IsEmpty(), IsEmpty())) + .Times(1); + + executeUnit.tick(); + + EXPECT_EQ(uopPtr->wasBranchMispredicted(), true); + EXPECT_EQ(uopPtr->wasBranchTaken(), taken); + + EXPECT_EQ(executeUnit.shouldFlush(), true); + EXPECT_EQ(output.getTailSlots()[0].get(), uop); + EXPECT_EQ(executeUnit.getBranchExecutedCount(), 1); + EXPECT_EQ(executeUnit.getBranchMispredictedCount(), 1); + EXPECT_EQ(executeUnit.getFlushAddress(), pc); + EXPECT_EQ(executeUnit.getFlushInsnId(), insnID); +} + +// Test that the flushing mechansim works correctly via purgeFlushed() +TEST_F(PipelineExecuteUnitTest, purgeFlushed) { + input.getHeadSlots()[0] = uopPtr; + + uop->setLatency(5); + uop->setStallCycles(5); + // Set up instructions so that only one is in the EU pipeline at a time + ON_CALL(*uop, getGroup()).WillByDefault(Return(3)); + ON_CALL(*uop, canExecute()).WillByDefault(Return(true)); + ON_CALL(*secondUop, getGroup()).WillByDefault(Return(4)); + ON_CALL(*secondUop, canExecute()).WillByDefault(Return(true)); + ON_CALL(*thirdUop, getGroup()).WillByDefault(Return(5)); + ON_CALL(*thirdUop, canExecute()).WillByDefault(Return(true)); + + EXPECT_CALL(*uop, execute()).Times(0); + EXPECT_CALL(*secondUop, execute()).Times(0); + EXPECT_CALL(*thirdUop, execute()).Times(1); + + // Stage all three instructions in EU pipeline + executeUnit.tick(); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + input.getHeadSlots()[0] = secondUopPtr; + executeUnit.tick(); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + input.getHeadSlots()[0] = thirdUopPtr; + executeUnit.tick(); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + + // Flush first two instructions + uopPtr->setFlushed(); + secondUopPtr->setFlushed(); + executeUnit.purgeFlushed(); + + // Ensure non-flushed instruction progresses through the pipeline + executeUnit.tick(); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); EXPECT_EQ(output.getTailSlots()[0].get(), thirdUop); + EXPECT_TRUE(executeUnit.isEmpty()); } } // namespace pipeline diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index fef76a9f61..f404029640 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -10,8 +10,15 @@ #include "simeng/pipeline/PipelineBuffer.hh" using ::testing::_; +using ::testing::AllOf; +using ::testing::AnyNumber; +using ::testing::AnyOf; +using ::testing::AtLeast; using ::testing::DoAll; using ::testing::Field; +using ::testing::Gt; +using ::testing::Lt; +using ::testing::Ne; using ::testing::Return; using ::testing::SetArgReferee; @@ -24,13 +31,18 @@ class PipelineFetchUnitTest : public testing::Test { : output(1, {}), fetchBuffer({{0, 16}, 0, 0}), completedReads(&fetchBuffer, 1), - fetchUnit(output, memory, 1024, 0, 16, isa, predictor), + fetchUnit(output, memory, 1024, 0, blockSize, isa, predictor), uop(new MockInstruction), - uopPtr(uop) { + uopPtr(uop), + uop2(new MockInstruction), + uopPtr2(uop2) { uopPtr->setInstructionAddress(0); } protected: + const uint8_t insnMaxSizeBytes = 4; + const uint8_t blockSize = 16; + PipelineBuffer output; MockMemoryInterface memory; MockArchitecture isa; @@ -43,6 +55,8 @@ class PipelineFetchUnitTest : public testing::Test { MockInstruction* uop; std::shared_ptr uopPtr; + MockInstruction* uop2; + std::shared_ptr uopPtr2; }; // Tests that ticking a fetch unit attempts to predecode from the correct @@ -52,7 +66,7 @@ TEST_F(PipelineFetchUnitTest, Tick) { ON_CALL(memory, getCompletedReads()).WillByDefault(Return(completedReads)); - ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(4)); + ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes)); // Set the output parameter to a 1-wide macro-op EXPECT_CALL(isa, predecode(_, _, 0, _)) @@ -84,8 +98,8 @@ TEST_F(PipelineFetchUnitTest, TickStalled) { // Tests that the fetch unit will handle instructions that straddle fetch block // boundaries by automatically requesting the next block of data. TEST_F(PipelineFetchUnitTest, FetchUnaligned) { - MacroOp macroOp = {uopPtr}; - ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(4)); + MacroOp mOp = {uopPtr}; + ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes)); ON_CALL(memory, getCompletedReads()).WillByDefault(Return(completedReads)); // Set PC to 14, so there will not be enough data to start decoding @@ -99,12 +113,328 @@ TEST_F(PipelineFetchUnitTest, FetchUnaligned) { fetchUnit.requestFromPC(); // Tick again, expecting that decoding will now resume - MemoryReadResult nextBlockValue = {{16, 16}, 0, 1}; + MemoryReadResult nextBlockValue = {{16, blockSize}, 0, 1}; span nextBlock = {&nextBlockValue, 1}; - EXPECT_CALL(memory, getCompletedReads()).WillOnce(Return(nextBlock)); - EXPECT_CALL(isa, predecode(_, _, _, _)) - .WillOnce(DoAll(SetArgReferee<3>(macroOp), Return(4))); + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(nextBlock)); + ON_CALL(isa, predecode(_, _, _, _)) + .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4))); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + EXPECT_CALL(memory, clearCompletedReads()).Times(4); + EXPECT_CALL(isa, getMaxInstructionSize()).Times(8); + EXPECT_CALL(isa, predecode(_, _, _, _)).Times(4); + + // Tick 4 times to process all 16 bytes of fetched data + for (int i = 0; i < 4; i++) { + fetchUnit.tick(); + } + // Tick a 5th time to ensure all buffered bytes have been used + EXPECT_CALL(memory, getCompletedReads()).Times(1); + EXPECT_CALL(isa, getMaxInstructionSize()).Times(1); + EXPECT_CALL(isa, predecode(_, _, _, _)).Times(0); + fetchUnit.tick(); +} + +// Tests that a properly aligned PC (to the fetch block boundary) is correctly +// fetched +TEST_F(PipelineFetchUnitTest, fetchAligned) { + const uint8_t pc = 16; + + ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes)); + + MemoryAccessTarget target = {pc, blockSize}; + EXPECT_CALL(isa, getMaxInstructionSize()).Times(1); + EXPECT_CALL(memory, requestRead(target, _)).Times(1); + + // Request block from Memory + fetchUnit.updatePC(pc); + fetchUnit.requestFromPC(); + + MacroOp mOp = {uopPtr}; + MemoryReadResult memReadResult = {target, RegisterValue(0xFFFF, blockSize), + 1}; + span nextBlock = {&memReadResult, 1}; + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(nextBlock)); + ON_CALL(isa, predecode(_, _, _, _)) + .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4))); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + EXPECT_CALL(memory, clearCompletedReads()).Times(4); + EXPECT_CALL(isa, getMaxInstructionSize()).Times(8); + EXPECT_CALL(isa, predecode(_, _, _, _)).Times(4); + + // Tick 4 times to process all 16 bytes of fetched data + for (int i = 0; i < 4; i++) { + fetchUnit.tick(); + } + // Tick a 5th time to ensure all buffered bytes have been used + EXPECT_CALL(memory, getCompletedReads()).Times(1); + EXPECT_CALL(memory, clearCompletedReads()).Times(0); + EXPECT_CALL(isa, getMaxInstructionSize()).Times(1); + EXPECT_CALL(isa, predecode(_, _, _, _)).Times(0); + fetchUnit.tick(); +} + +// Tests that halting functionality triggers correctly +TEST_F(PipelineFetchUnitTest, halted) { + ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes)); + EXPECT_FALSE(fetchUnit.hasHalted()); + fetchUnit.tick(); + EXPECT_FALSE(fetchUnit.hasHalted()); + + // Test PC >= programByteLength triggers halting + fetchUnit.updatePC(1024); + EXPECT_TRUE(fetchUnit.hasHalted()); + + // Test PC being incremented to >= programByteLength triggers halting + fetchUnit.updatePC(1008); + EXPECT_FALSE(fetchUnit.hasHalted()); + + MemoryAccessTarget target = {1008, blockSize}; + EXPECT_CALL(isa, getMaxInstructionSize()).Times(1); + EXPECT_CALL(memory, requestRead(target, _)).Times(1); + fetchUnit.requestFromPC(); + + MacroOp mOp = {uopPtr}; + MemoryReadResult memReadResult = {target, RegisterValue(0xFFFF, blockSize), + 1}; + span nextBlock = {&memReadResult, 1}; + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(nextBlock)); + ON_CALL(isa, predecode(_, _, _, _)) + .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4))); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + EXPECT_CALL(memory, clearCompletedReads()).Times(4); + EXPECT_CALL(isa, getMaxInstructionSize()).Times(8); + EXPECT_CALL(isa, predecode(_, _, _, _)).Times(4); + // Tick 4 times to process all 16 bytes of fetched data + for (int i = 0; i < 4; i++) { + fetchUnit.tick(); + } + EXPECT_TRUE(fetchUnit.hasHalted()); +} + +// Tests that fetching a branch instruction (predicted taken) mid block causes a +// branch stall + discards the remaining fetched instructions +TEST_F(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { + const uint8_t pc = 16; + + ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes)); + + MemoryAccessTarget target = {pc, blockSize}; + EXPECT_CALL(isa, getMaxInstructionSize()).Times(1); + EXPECT_CALL(memory, requestRead(target, _)).Times(1); + + // Request block from memory + fetchUnit.updatePC(pc); + fetchUnit.requestFromPC(); + + MacroOp mOp = {uopPtr}; + MemoryReadResult memReadResult = {target, RegisterValue(0xFFFF, blockSize), + 1}; + span nextBlock = {&memReadResult, 1}; + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(nextBlock)); + ON_CALL(isa, predecode(_, _, _, _)) + .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4))); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + + // For first tick, process instruction as non-branch + EXPECT_CALL(memory, clearCompletedReads()).Times(1); + EXPECT_CALL(isa, getMaxInstructionSize()).Times(2); + EXPECT_CALL(isa, predecode(_, _, _, _)).Times(1); + EXPECT_CALL(*uop, isBranch()).WillOnce(Return(false)); + fetchUnit.tick(); + + // For second tick, process a taken branch meaning rest of block is discarded + // & a new memory block is requested + EXPECT_CALL(memory, getCompletedReads()).Times(0); + EXPECT_CALL(memory, clearCompletedReads()).Times(1); + EXPECT_CALL(isa, getMaxInstructionSize()).Times(2); + EXPECT_CALL(isa, predecode(_, _, _, _)).Times(1); + EXPECT_CALL(*uop, isBranch()).WillOnce(Return(true)); + BranchType bType = BranchType::Unconditional; + uint64_t knownOff = 304; + EXPECT_CALL(*uop, getBranchType()).WillOnce(Return(bType)); + EXPECT_CALL(*uop, getKnownOffset()).WillOnce(Return(knownOff)); + BranchPrediction pred = {true, pc + knownOff}; + EXPECT_CALL(predictor, predict(20, bType, knownOff)).WillOnce(Return(pred)); + fetchUnit.tick(); + + // Ensure on next tick, predecode is not called + EXPECT_CALL(memory, getCompletedReads()).Times(1); + EXPECT_CALL(memory, clearCompletedReads()).Times(0); + EXPECT_CALL(isa, getMaxInstructionSize()).Times(1); + EXPECT_CALL(isa, predecode(_, _, _, _)).Times(0); + fetchUnit.tick(); + + // Make sure on next call to `requestFromPC`, target is address 320 + // (pred.target) + target = {pred.target, blockSize}; + EXPECT_CALL(isa, getMaxInstructionSize()).Times(1); + EXPECT_CALL(memory, requestRead(target, _)).Times(1); + fetchUnit.requestFromPC(); +} + +// Tests the functionality of the supplying from the Loop Buffer +TEST_F(PipelineFetchUnitTest, supplyFromLoopBuffer) { + // Set instructions to be fetched from memory + MemoryReadResult memReadResult = { + {0x0, blockSize}, RegisterValue(0xFFFF, blockSize), 1}; + span nextBlock = {&memReadResult, 1}; + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(nextBlock)); + + ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes)); + + // Register loop boundary + fetchUnit.registerLoopBoundary(0xC); + + // Set the instructions, within the loop body, to be returned from predecode + MacroOp mOp2 = {uopPtr2}; + ON_CALL(isa, predecode(_, _, 0xC, _)) + .WillByDefault(DoAll(SetArgReferee<3>(mOp2), Return(4))); + ON_CALL(*uop2, isBranch()).WillByDefault(Return(true)); + + MacroOp mOp = {uopPtr}; + ON_CALL(isa, predecode(_, _, Ne(0xC), _)) + .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4))); + ON_CALL(*uop, isBranch()).WillByDefault(Return(false)); + + // Set the expectation from the predictor to be true so a loop body will + // be detected + ON_CALL(predictor, predict(_, _, _)) + .WillByDefault(Return(BranchPrediction({true, 0x0}))); + + // Set Loop Buffer state to be LoopBufferState::FILLING + // Tick 4 times to process all 16 bytes of fetched data + for (int i = 0; i < 4; i++) { + fetchUnit.tick(); + } + + // Fetch the next block of instructions from memory + fetchUnit.requestFromPC(); + + // Fill Loop Buffer and set its state to be LoopBufferState::SUPPLYING + // Tick 4 times to process all 16 bytes of fetched data + for (int i = 0; i < 4; i++) { + fetchUnit.tick(); + } + + // Whilst the Loop Buffer state is LoopBufferState::SUPPLYING, the request + // read should never be called + EXPECT_CALL(memory, requestRead(_, _)).Times(0); + EXPECT_CALL(isa, getMaxInstructionSize()).Times(0); + EXPECT_CALL(memory, getCompletedReads()).Times(0); + fetchUnit.requestFromPC(); + + // Empty output buffer and ensure the correct instructions are supplied from + // the Loop Buffer + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp); + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp); + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp); + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp2); + + // Flush the Loop Buffer and ensure correct instructions are fetched from + // memory + fetchUnit.flushLoopBuffer(); + fetchUnit.updatePC(0x0); + EXPECT_CALL(memory, requestRead(_, _)).Times(AtLeast(1)); + EXPECT_CALL(isa, getMaxInstructionSize()).Times(AtLeast(1)); + EXPECT_CALL(memory, getCompletedReads()).Times(AtLeast(1)); + fetchUnit.requestFromPC(); + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp); + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp); + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp); + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp2); +} + +// Tests the functionality of idling the supply to the Loop Buffer one of not +// taken branch at the loopBoundaryAddress_ +TEST_F(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { + // Set instructions to be fetched from memory + MemoryReadResult memReadResultA = { + {0x0, blockSize}, RegisterValue(0xFFFF, blockSize), 1}; + span nextBlockA = {&memReadResultA, 1}; + MemoryReadResult memReadResultB = { + {0x10, blockSize}, RegisterValue(0xFFFF, blockSize), 1}; + span nextBlockB = {&memReadResultB, 1}; + EXPECT_CALL(memory, getCompletedReads()).WillRepeatedly(Return(nextBlockA)); + + ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes)); + + // Register loop boundary + fetchUnit.registerLoopBoundary(0xC); + + // Set the instructions, within the loop body, to be returned from predecode + MacroOp mOp2 = {uopPtr2}; + ON_CALL(isa, predecode(_, _, Gt(0x8), _)) + .WillByDefault(DoAll(SetArgReferee<3>(mOp2), Return(4))); + ON_CALL(*uop2, isBranch()).WillByDefault(Return(true)); + + MacroOp mOp = {uopPtr}; + ON_CALL(isa, predecode(_, _, Lt(0xC), _)) + .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4))); + ON_CALL(*uop, isBranch()).WillByDefault(Return(false)); + + // Set the first expectation from the predictor to be true so a loop body will + // be detected + EXPECT_CALL(predictor, predict(_, _, _)) + .WillOnce(Return(BranchPrediction({true, 0x0}))); + + // Set Loop Buffer state to be LoopBufferState::FILLING + // Tick 4 times to process all 16 bytes of fetched data + for (int i = 0; i < 4; i++) { + fetchUnit.tick(); + } + + // Fetch the next block of instructions from memory and change the expected + // outcome of the branch predictor + fetchUnit.requestFromPC(); + EXPECT_CALL(predictor, predict(_, _, _)) + .WillRepeatedly(Return(BranchPrediction({false, 0x0}))); + + // Attempt to fill Loop Buffer but prevent it on a not taken outcome at the + // loopBoundaryAddress_ branch + // Tick 4 times to process all 16 bytes of fetched data + for (int i = 0; i < 4; i++) { + fetchUnit.tick(); + } + + // Set the expectation for the next block to be fetched after the Loop Buffer + // state has been reset + const MemoryAccessTarget target = {0x10, blockSize}; + EXPECT_CALL(memory, getCompletedReads()).WillRepeatedly(Return(nextBlockB)); + EXPECT_CALL(memory, requestRead(target, _)).Times(1); + + // Fetch the next block of instructions from memory + fetchUnit.requestFromPC(); + + // Empty output buffer and ensure the correct instructions are fetched from + // memory + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp2); + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp2); + output.fill({}); + fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp2); + output.fill({}); fetchUnit.tick(); + EXPECT_EQ(output.getTailSlots()[0], mOp2); } } // namespace pipeline diff --git a/test/unit/pipeline/LoadStoreQueueTest.cc b/test/unit/pipeline/LoadStoreQueueTest.cc index 414363352c..5ac8e36b04 100644 --- a/test/unit/pipeline/LoadStoreQueueTest.cc +++ b/test/unit/pipeline/LoadStoreQueueTest.cc @@ -17,6 +17,10 @@ const uint8_t MAX_LOADS = 32; const uint8_t MAX_STORES = 32; const uint8_t MAX_COMBINED = 64; +// TODO: When the associated requestWrite(...) gets moved into the LSQ's tick() +// functionality, we need to check the state of requestStoreQueue_ and calling +// of requestWrite(...) in a vareity of tests + class MockForwardOperandsHandler { public: MOCK_METHOD2(forwardOperands, @@ -36,10 +40,12 @@ class LoadStoreQueueTest : public ::testing::TestWithParam { loadUop2(new MockInstruction), storeUop(new MockInstruction), storeUop2(new MockInstruction), + loadStoreUop(new MockInstruction), loadUopPtr(loadUop), loadUopPtr2(loadUop2), storeUopPtr(storeUop), - storeUopPtr2(storeUop2) { + storeUopPtr2(storeUop2), + loadStoreUopPtr(loadStoreUop) { // Set up sensible return values for the load uop ON_CALL(*loadUop, isLoad()).WillByDefault(Return(true)); ON_CALL(*loadUop, getGeneratedAddresses()) @@ -54,7 +60,12 @@ class LoadStoreQueueTest : public ::testing::TestWithParam { } protected: - LoadStoreQueue getQueue() { + LoadStoreQueue getQueue(bool exclusive = false, + uint16_t loadBandwidth = UINT16_MAX, + uint16_t storeBandwidth = UINT16_MAX, + uint16_t permittedRequests = UINT16_MAX, + uint16_t permittedLoads = UINT16_MAX, + uint16_t permittedStores = UINT16_MAX) { if (GetParam()) { // Combined queue return LoadStoreQueue( @@ -63,7 +74,8 @@ class LoadStoreQueueTest : public ::testing::TestWithParam { [this](auto registers, auto values) { forwardOperandsHandler.forwardOperands(registers, values); }, - [](auto uop) {}); + [](auto uop) {}, exclusive, loadBandwidth, storeBandwidth, + permittedRequests, permittedLoads, permittedStores); } else { // Split queue return LoadStoreQueue( @@ -72,7 +84,8 @@ class LoadStoreQueueTest : public ::testing::TestWithParam { [this](auto registers, auto values) { forwardOperandsHandler.forwardOperands(registers, values); }, - [](auto uop) {}); + [](auto uop) {}, exclusive, loadBandwidth, storeBandwidth, + permittedRequests, permittedLoads, permittedStores); } } @@ -122,11 +135,13 @@ class LoadStoreQueueTest : public ::testing::TestWithParam { MockInstruction* loadUop2; MockInstruction* storeUop; MockInstruction* storeUop2; + MockInstruction* loadStoreUop; std::shared_ptr loadUopPtr; std::shared_ptr loadUopPtr2; std::shared_ptr storeUopPtr; std::shared_ptr storeUopPtr2; + std::shared_ptr loadStoreUopPtr; MockForwardOperandsHandler forwardOperandsHandler; @@ -203,11 +218,49 @@ TEST_P(LoadStoreQueueTest, AddStore) { TEST_P(LoadStoreQueueTest, PurgeFlushedLoad) { auto queue = getQueue(); auto initialLoadSpace = queue.getLoadQueueSpace(); + MemoryReadResult completedRead = {addresses[0], data[0], 1}; + span completedReads = {&completedRead, 1}; + + // Set load instruction attributes + loadUop->setSequenceId(0); + loadUop->setInstructionId(0); + loadUop2->setSequenceId(1); + loadUop2->setInstructionId(1); + + EXPECT_CALL(*loadUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(addressesSpan)); + EXPECT_CALL(*loadUop2, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(addressesSpan)); + + // Add loads to LSQ queue.addLoad(loadUopPtr); + queue.addLoad(loadUopPtr2); + + // Start the first load so that its accesses can be added to + // requestLoadQueue_/requestedLoads_ and expect a memory access to be + // performed + queue.startLoad(loadUopPtr); + EXPECT_CALL(dataMemory, requestRead(addresses[0], 0)).Times(1); + queue.tick(); + // Start the second load so that its accesses can be added to + // requestLoadQueue_/requestedLoads_ but flush it before it can perform a + // memory access + queue.startLoad(loadUopPtr2); loadUop->setFlushed(); + loadUop2->setFlushed(); queue.purgeFlushed(); + // Expect no activity regarding memory accesses or the passing of the load + // instruction to the output buffer + EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0); + EXPECT_CALL(dataMemory, getCompletedReads()) + .WillRepeatedly(Return(completedReads)); + queue.tick(); + + EXPECT_EQ(completionSlots[0].getTailSlots()[0], nullptr); EXPECT_EQ(queue.getLoadQueueSpace(), initialLoadSpace); } @@ -231,11 +284,21 @@ TEST_P(LoadStoreQueueTest, Load) { MemoryReadResult completedRead = {addresses[0], data[0], 1}; span completedReads = {&completedRead, 1}; - EXPECT_CALL(*loadUop, getGeneratedAddresses()).Times(AtLeast(1)); - - loadUop->setDataPending(addresses.size()); + // Set load instruction attributes + EXPECT_CALL(*loadUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(addressesSpan)); + loadUop->setLSQLatency(3); + // Begin load in LSQ queue.addLoad(loadUopPtr); + queue.startLoad(loadUopPtr); + + // Given 3 cycle latency, no requests should occur in the first two ticks of + // the LSQ + EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0); + queue.tick(); + queue.tick(); // Check that a read request is made to the memory interface EXPECT_CALL(dataMemory, requestRead(addresses[0], _)).Times(1); @@ -245,15 +308,40 @@ TEST_P(LoadStoreQueueTest, Load) { .WillRepeatedly(Return(completedReads)); // Check that the LSQ supplies the right data to the instruction - // TODO: Replace with check for call over memory interface in future? EXPECT_CALL(*loadUop, - supplyData(0, Property(&RegisterValue::get, data[0]))) + supplyData(addresses[0].address, + Property(&RegisterValue::get, data[0]))) .Times(1); + // Tick the queue to complete the load + queue.tick(); + + EXPECT_EQ(completionSlots[0].getTailSlots()[0].get(), loadUop); +} + +// Tests that a queue can perform a load with no addresses +TEST_P(LoadStoreQueueTest, LoadWithNoAddresses) { + loadUop->setSequenceId(1); + auto queue = getQueue(); + + span emptyAddressesSpan = {}; + + EXPECT_CALL(*loadUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(emptyAddressesSpan)); + + // Check that a read request isn't made to the memory interface but the load + // completes in the LSQ + EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0); + EXPECT_CALL(*loadUop, execute()).Times(1); + + queue.addLoad(loadUopPtr); queue.startLoad(loadUopPtr); // Tick the queue to complete the load queue.tick(); + + EXPECT_EQ(completionSlots[0].getTailSlots()[0].get(), loadUop); } // Tests that a queue can commit a load @@ -275,14 +363,18 @@ TEST_P(LoadStoreQueueTest, Store) { auto queue = getQueue(); auto initialStoreSpace = queue.getStoreQueueSpace(); - EXPECT_CALL(*storeUop, getGeneratedAddresses()).Times(AtLeast(1)); - EXPECT_CALL(*storeUop, getData()).Times(AtLeast(1)); - + // Set store instruction attributes storeUop->setSequenceId(1); storeUop->setInstructionId(1); + EXPECT_CALL(*storeUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(addressesSpan)); + EXPECT_CALL(*storeUop, getData()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(dataSpan)); + queue.addStore(storeUopPtr); - storeUopPtr->setCommitReady(); queue.supplyStoreData(storeUopPtr); // Check that a write request is sent to the memory interface @@ -299,6 +391,298 @@ TEST_P(LoadStoreQueueTest, Store) { EXPECT_EQ(queue.getStoreQueueSpace(), initialStoreSpace); } +// Tests that a queue can perform a load-store operation +TEST_P(LoadStoreQueueTest, LoadStore) { + auto queue = getQueue(); + auto initialLoadSpace = queue.getLoadQueueSpace(); + auto initialStoreSpace = queue.getStoreQueueSpace(); + + MemoryReadResult completedRead = {addresses[0], data[0], 1}; + span completedReads = {&completedRead, 1}; + + // Set load-store instruction attributes + loadStoreUop->setSequenceId(1); + loadStoreUop->setInstructionId(1); + + EXPECT_CALL(*loadStoreUop, isLoad()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*loadStoreUop, isStoreData()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(true)); + + EXPECT_CALL(*loadStoreUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(addressesSpan)); + EXPECT_CALL(*loadStoreUop, getData()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(dataSpan)); + + // Register load-store operation and start load portion + queue.addLoad(loadStoreUopPtr); + queue.addStore(loadStoreUopPtr); + queue.startLoad(loadStoreUopPtr); + + // Check that a read request is made to the memory interface + EXPECT_CALL(dataMemory, requestRead(addresses[0], _)).Times(1); + + // Expect a check against finished reads and return the result + EXPECT_CALL(dataMemory, getCompletedReads()) + .WillRepeatedly(Return(completedReads)); + + // Check that the LSQ supplies the right data to the instruction + EXPECT_CALL(*loadStoreUop, + supplyData(addresses[0].address, + Property(&RegisterValue::get, data[0]))) + .Times(1); + + // Tick the queue to complete the load portion of the load-store + queue.tick(); + EXPECT_EQ(completionSlots[0].getTailSlots()[0].get(), loadStoreUop); + + // Check that a write request is sent to the memory interface + EXPECT_CALL(dataMemory, + requestWrite(addresses[0], + Property(&RegisterValue::get, data[0]))) + .Times(1); + + // Commit both potions of the load-store + queue.commitLoad(loadStoreUopPtr); + queue.commitStore(loadStoreUopPtr); + + // Check the load-store was removed + EXPECT_EQ(queue.getLoadQueueSpace(), initialLoadSpace); + EXPECT_EQ(queue.getStoreQueueSpace(), initialStoreSpace); +} + +// Tests that bandwidth restrictions are adhered to in a non-exclusive LSQ +TEST_P(LoadStoreQueueTest, NonExclusiveBandwidthRestriction) { + auto queue = getQueue(false, 3, 3); + + // Set instruction attributes + loadUop->setSequenceId(0); + loadUop->setInstructionId(0); + storeUop->setSequenceId(1); + storeUop->setInstructionId(1); + loadUop2->setSequenceId(2); + loadUop2->setInstructionId(2); + + std::vector multipleAddresses = {{1, 2}, {2, 2}}; + span multipleAddressesSpan = { + multipleAddresses.data(), multipleAddresses.size()}; + std::vector storeData = {static_cast(0x01), + static_cast(0x10)}; + span storeDataSpan = {storeData.data(), + storeData.size()}; + + EXPECT_CALL(*loadUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*storeUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*loadUop2, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*storeUop, getData()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(storeDataSpan)); + + // Add instructions to LSQ and register their accesses to be processed in the + // tick() function + queue.addLoad(loadUopPtr); + queue.addLoad(loadUopPtr2); + queue.startLoad(loadUopPtr); + queue.startLoad(loadUopPtr2); + queue.addStore(storeUopPtr); + queue.supplyStoreData(storeUopPtr); + queue.commitStore(storeUopPtr); + + // Set expectations for tick logic based on set restrictions. Only 2 bytes of + // read and 2 bytes of write accesses should be processed per cycle (in this + // case that translates to one of the two addresses each uop has to handle). + EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(1); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(1); + queue.tick(); +} + +// Tests that bandwidth restrictions are adhered to in an exclusive LSQ +TEST_P(LoadStoreQueueTest, ExclusiveBandwidthRestriction) { + auto queue = getQueue(true, 3, 3); + + // Set instruction attributes + loadUop->setSequenceId(0); + loadUop->setInstructionId(0); + storeUop->setSequenceId(1); + storeUop->setInstructionId(1); + loadUop2->setSequenceId(2); + loadUop2->setInstructionId(2); + + std::vector multipleAddresses = {{1, 2}, {2, 2}}; + span multipleAddressesSpan = { + multipleAddresses.data(), multipleAddresses.size()}; + std::vector storeData = {static_cast(0x01), + static_cast(0x10)}; + span storeDataSpan = {storeData.data(), + storeData.size()}; + + EXPECT_CALL(*loadUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*storeUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*loadUop2, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*storeUop, getData()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(storeDataSpan)); + + // Add instructions to LSQ and register their accesses to be processed in the + // tick() function + queue.addLoad(loadUopPtr); + queue.addLoad(loadUopPtr2); + queue.startLoad(loadUopPtr); + queue.startLoad(loadUopPtr2); + queue.addStore(storeUopPtr); + queue.supplyStoreData(storeUopPtr); + queue.commitStore(storeUopPtr); + + // Set expectations for tick logic based on set restrictions. Only 2 bytes of + // read and 2 bytes of write accesses should be processed per cycle (in this + // case that translates to one of the two addresses each uop has to handle). + // However, there cannot be an overlap between load and store bandwidth usage + // per cycle due to the LSQ being exclusive + EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(1); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(1); + queue.tick(); +} + +// Tests that request restrictions are adhered to in a non-exclusive LSQ +TEST_P(LoadStoreQueueTest, NonExclusiveRequestsRestriction) { + auto queue = getQueue(false, UINT16_MAX, UINT16_MAX, 2, 2, 1); + + // Set instruction attributes + loadUop->setSequenceId(0); + loadUop->setInstructionId(0); + storeUop->setSequenceId(1); + storeUop->setInstructionId(1); + loadUop2->setSequenceId(2); + loadUop2->setInstructionId(2); + + std::vector multipleAddresses = {{1, 2}, {2, 2}}; + span multipleAddressesSpan = { + multipleAddresses.data(), multipleAddresses.size()}; + std::vector storeData = {static_cast(0x01), + static_cast(0x10)}; + span storeDataSpan = {storeData.data(), + storeData.size()}; + + EXPECT_CALL(*loadUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*storeUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*loadUop2, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*storeUop, getData()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(storeDataSpan)); + + // Add instructions to LSQ and register their accesses to be processed in the + // tick() function + queue.addLoad(loadUopPtr); + queue.addLoad(loadUopPtr2); + queue.startLoad(loadUopPtr); + queue.startLoad(loadUopPtr2); + queue.addStore(storeUopPtr); + queue.supplyStoreData(storeUopPtr); + queue.commitStore(storeUopPtr); + + // Set expectations for tick logic based on set restrictions. Either 2 reads + // or 1 read and 1 write should be processed per cycle + EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(2); + queue.tick(); +} + +// Tests that request restrictions are adhered to in an exclusive LSQ +TEST_P(LoadStoreQueueTest, ExclusiveRequestsRestriction) { + auto queue = getQueue(true, UINT16_MAX, UINT16_MAX, 3, 2, 1); + + // Set instruction attributes + loadUop->setSequenceId(0); + loadUop->setInstructionId(0); + storeUop->setSequenceId(1); + storeUop->setInstructionId(1); + loadUop2->setSequenceId(2); + loadUop2->setInstructionId(2); + + std::vector multipleAddresses = {{1, 2}, {2, 2}}; + span multipleAddressesSpan = { + multipleAddresses.data(), multipleAddresses.size()}; + std::vector storeData = {static_cast(0x01), + static_cast(0x10)}; + span storeDataSpan = {storeData.data(), + storeData.size()}; + + EXPECT_CALL(*loadUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*storeUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*loadUop2, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(multipleAddressesSpan)); + EXPECT_CALL(*storeUop, getData()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(storeDataSpan)); + + // Add instructions to LSQ and register their accesses to be processed in the + // tick() function + queue.addLoad(loadUopPtr); + queue.addLoad(loadUopPtr2); + queue.startLoad(loadUopPtr); + queue.startLoad(loadUopPtr2); + queue.addStore(storeUopPtr); + queue.supplyStoreData(storeUopPtr); + queue.commitStore(storeUopPtr); + + // Set expectations for tick logic based on set restrictions. Only 2 reads and + // 1 write should be processed per cycle. However, there cannot be an overlap + // between load and store requests being processed in a single cycle due to + // the LSQ being exclusive. + EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(2); + queue.tick(); + EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(2); + queue.tick(); +} + // Tests that committing a store will correctly detect a direct memory order // violation TEST_P(LoadStoreQueueTest, Violation) { @@ -456,6 +840,66 @@ TEST_P(LoadStoreQueueTest, FlushDuringConfliction) { queue.tick(); } +// Test that a load access exactly conflicting on a store access (matching +// address and access size no larger) gets its data supplied when the store +// commits +TEST_P(LoadStoreQueueTest, SupplyDataToConfliction) { + auto queue = getQueue(); + + // Set instruction attributes + storeUop->setSequenceId(0); + storeUop->setInstructionId(0); + loadUop->setSequenceId(1); + loadUop->setInstructionId(1); + + std::vector storeAddresses = {{1, 1}, {2, 1}}; + span storeAddressesSpan = {storeAddresses.data(), + storeAddresses.size()}; + std::vector storeData = {static_cast(0x01), + static_cast(0x10)}; + span storeDataSpan = {storeData.data(), + storeData.size()}; + EXPECT_CALL(*storeUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(storeAddressesSpan)); + EXPECT_CALL(*storeUop, getData()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(storeDataSpan)); + + // Set load addresses which exactly and partially overlaps on first and second + // store addresses respectively + std::vector loadAddresses = {{1, 1}, {2, 2}, {3, 1}}; + span loadAddressesSpan = {loadAddresses.data(), + loadAddresses.size()}; + EXPECT_CALL(*loadUop, getGeneratedAddresses()) + .Times(AtLeast(1)) + .WillRepeatedly(Return(loadAddressesSpan)); + + // Add instructions to LSQ + queue.addStore(storeUopPtr); + queue.addLoad(loadUopPtr); + + // Supply store data so the store can commit + queue.supplyStoreData(storeUopPtr); + + // Start the load so the confliction can be registered + queue.startLoad(loadUopPtr); + + // Two of the accesses don't exactly conflict so they should generate memory + // accesses + EXPECT_CALL(dataMemory, requestRead(loadAddresses[1], 1)).Times(1); + EXPECT_CALL(dataMemory, requestRead(loadAddresses[2], 1)).Times(1); + queue.tick(); + + // The one access which does exactly conflict with a store access should get + // its data supplied on the store's commitment + EXPECT_CALL(*loadUop, + supplyData(loadAddresses[0].address, + Property(&RegisterValue::get, storeData[0]))) + .Times(1); + queue.commitStore(storeUopPtr); +} + INSTANTIATE_TEST_SUITE_P(LoadStoreQueueTests, LoadStoreQueueTest, ::testing::Values(false, true)); diff --git a/test/unit/pipeline/M1PortAllocatorTest.cc b/test/unit/pipeline/M1PortAllocatorTest.cc new file mode 100644 index 0000000000..bdfe4c6cd7 --- /dev/null +++ b/test/unit/pipeline/M1PortAllocatorTest.cc @@ -0,0 +1,157 @@ +#include "gtest/gtest.h" +#include "simeng/pipeline/M1PortAllocator.hh" + +namespace simeng { +namespace pipeline { + +class M1PortAllocatorTest : public testing::Test { + public: + M1PortAllocatorTest() : portAllocator(portArrangement, rsArrangement) { + portAllocator.setRSSizeGetter( + [this](std::vector& sizeVec) { rsSizes(sizeVec); }); + } + + void rsSizes(std::vector& sizeVec) const { + sizeVec = rsFreeEntries; + } + + protected: + // Representation of the M1 Firestorm reservation station layout + std::vector rsFreeEntries = {24, 26, 16, 12, 28, 28, 12, + 12, 12, 12, 36, 36, 36, 36}; + // Representation of the M1 Firestorm port layout + const std::vector> portArrangement = { + {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}}; + // Representation of the M1 Firestorm Reservation Station Arrangement + // std::pair = + std::vector> rsArrangement = { + {0, 24}, {1, 26}, {2, 16}, {3, 12}, {4, 28}, {5, 28}, {6, 12}, + {7, 12}, {8, 12}, {9, 12}, {10, 36}, {11, 36}, {12, 36}, {13, 36}}; + + M1PortAllocator portAllocator; +}; + +// Tests correct allocation for single port groups (i.e. INT_DIV_OR_SQRT) +TEST_F(M1PortAllocatorTest, singlePortAllocation) { + std::vector ports = {4}; + EXPECT_EQ(portAllocator.allocate(ports), 4); +} + +// Tests correct allocation of multiple INT_SIMPLE instructions +TEST_F(M1PortAllocatorTest, allocationIntSimple) { + std::vector ports = {0, 1, 2, 3, 4, 5}; + EXPECT_EQ(portAllocator.allocate(ports), 0); + rsFreeEntries[0]--; + EXPECT_EQ(portAllocator.allocate(ports), 1); + rsFreeEntries[1]--; + EXPECT_EQ(portAllocator.allocate(ports), 2); + rsFreeEntries[2]--; + EXPECT_EQ(portAllocator.allocate(ports), 3); + rsFreeEntries[3]--; + EXPECT_EQ(portAllocator.allocate(ports), 4); + rsFreeEntries[4]--; + EXPECT_EQ(portAllocator.allocate(ports), 5); + rsFreeEntries[5]--; + EXPECT_EQ(portAllocator.allocate(ports), 0); + rsFreeEntries[0]--; + + // Ensure `issued()` logic works as expected + portAllocator.issued(3); + rsFreeEntries[3]++; + EXPECT_EQ(portAllocator.allocate(ports), 3); + rsFreeEntries[3]--; +} + +// Tests correct allocation of multiple BRANCH instructions +TEST_F(M1PortAllocatorTest, allocationBranch) { + std::vector ports = {0, 1}; + EXPECT_EQ(portAllocator.allocate(ports), 0); + rsFreeEntries[0]--; + EXPECT_EQ(portAllocator.allocate(ports), 1); + rsFreeEntries[1]--; + EXPECT_EQ(portAllocator.allocate(ports), 0); + rsFreeEntries[0]--; + + // Ensure `issued()` logic works as expected + portAllocator.issued(0); + rsFreeEntries[0]++; + EXPECT_EQ(portAllocator.allocate(ports), 0); + rsFreeEntries[0]--; +} + +// Tests correct allocation of multiple INT_MUL instructions +TEST_F(M1PortAllocatorTest, allocationIntMul) { + std::vector ports = {4, 5}; + EXPECT_EQ(portAllocator.allocate(ports), 4); + rsFreeEntries[4]--; + EXPECT_EQ(portAllocator.allocate(ports), 5); + rsFreeEntries[5]--; + EXPECT_EQ(portAllocator.allocate(ports), 4); + rsFreeEntries[4]--; + + // Ensure `issued()` logic works as expected + portAllocator.issued(4); + rsFreeEntries[4]++; + EXPECT_EQ(portAllocator.allocate(ports), 4); + rsFreeEntries[4]--; +} + +// Tests correct allocation of multiple LOAD instructions +TEST_F(M1PortAllocatorTest, allocationLoad) { + std::vector ports = {7, 8, 9}; + EXPECT_EQ(portAllocator.allocate(ports), 7); + rsFreeEntries[7]--; + EXPECT_EQ(portAllocator.allocate(ports), 8); + rsFreeEntries[8]--; + EXPECT_EQ(portAllocator.allocate(ports), 9); + rsFreeEntries[9]--; + EXPECT_EQ(portAllocator.allocate(ports), 7); + rsFreeEntries[7]--; + + // Ensure `issued()` logic works as expected + portAllocator.issued(9); + rsFreeEntries[9]++; + EXPECT_EQ(portAllocator.allocate(ports), 9); + rsFreeEntries[9]--; +} + +// Tests correct allocation of multiple STORE instructions +TEST_F(M1PortAllocatorTest, allocationStore) { + std::vector ports = {6, 7}; + EXPECT_EQ(portAllocator.allocate(ports), 6); + rsFreeEntries[6]--; + EXPECT_EQ(portAllocator.allocate(ports), 7); + rsFreeEntries[7]--; + EXPECT_EQ(portAllocator.allocate(ports), 6); + rsFreeEntries[6]--; + + // Ensure `issued()` logic works as expected + portAllocator.issued(6); + rsFreeEntries[6]++; + EXPECT_EQ(portAllocator.allocate(ports), 6); + rsFreeEntries[6]--; +} + +// Tests correct allocation of multiple FP / VECTOR instructions +TEST_F(M1PortAllocatorTest, allocationFpVec) { + std::vector ports = {10, 11, 12, 13}; + EXPECT_EQ(portAllocator.allocate(ports), 10); + rsFreeEntries[10]--; + EXPECT_EQ(portAllocator.allocate(ports), 11); + rsFreeEntries[11]--; + EXPECT_EQ(portAllocator.allocate(ports), 12); + rsFreeEntries[12]--; + EXPECT_EQ(portAllocator.allocate(ports), 13); + rsFreeEntries[13]--; + EXPECT_EQ(portAllocator.allocate(ports), 10); + rsFreeEntries[10]--; + + // Ensure `issued()` logic works as expected + portAllocator.issued(12); + rsFreeEntries[12]++; + EXPECT_EQ(portAllocator.allocate(ports), 12); + rsFreeEntries[12]--; +} + +} // namespace pipeline +} // namespace simeng \ No newline at end of file diff --git a/test/unit/pipeline/MappedRegisterFileSetTest.cc b/test/unit/pipeline/MappedRegisterFileSetTest.cc new file mode 100644 index 0000000000..fc63657779 --- /dev/null +++ b/test/unit/pipeline/MappedRegisterFileSetTest.cc @@ -0,0 +1,56 @@ +#include "gtest/gtest.h" +#include "simeng/pipeline/MappedRegisterFileSet.hh" + +namespace simeng { +namespace pipeline { + +class MappedRegisterFileSetTest : public ::testing::Test { + public: + MappedRegisterFileSetTest() + : regFileSet(physRegFileStruct), + rat(archRegFileStruct, physRegCounts), + mappedRegFile(regFileSet, rat) {} + + protected: + const std::vector archRegFileStruct = { + {8, 10}, {24, 15}, {256, 31}}; + const std::vector physRegFileStruct = { + {8, 20}, {24, 30}, {256, 62}}; + const std::vector physRegCounts = {20, 30, 62}; + + RegisterFileSet regFileSet; + RegisterAliasTable rat; + + MappedRegisterFileSet mappedRegFile; +}; + +// Ensure that with continually changing physical-architectural register mapping +// changes, the correct register is being updated with set(). +TEST_F(MappedRegisterFileSetTest, getSet) { + // Loop through all register types + for (uint8_t i = 0; i < archRegFileStruct.size(); i++) { + // Keep allocating the same register to a) keep past values and b) more + // easily verify correct functionality + const uint16_t maxRegTag = archRegFileStruct[i].quantity - 1; + const uint16_t regSize = archRegFileStruct[i].bytes; + const Register rMax = {i, maxRegTag}; + + std::vector physRegs; + for (int j = 2; j < 12; j++) { + physRegs.push_back(rat.allocate(rMax)); + RegisterValue regVal = RegisterValue(j, regSize); + mappedRegFile.set(rMax, regVal); + EXPECT_EQ(mappedRegFile.get(rMax), regVal); + } + + for (int k = 0; k < 10; k++) { + // RAT constructed where Arch-Phys mapping is 1:1. So, first re-mapped + // value will be to maxArchRegRag + 1 + EXPECT_EQ(physRegs[k].tag, maxRegTag + k + 1); + EXPECT_EQ(physRegs[k].type, i); + EXPECT_EQ(regFileSet.get(physRegs[k]), RegisterValue(k + 2, regSize)); + } + } +} +} // namespace pipeline +} // namespace simeng \ No newline at end of file diff --git a/test/unit/pipeline/RegisterAliasTableTest.cc b/test/unit/pipeline/RegisterAliasTableTest.cc index 99b3daf059..6b6f1d9985 100644 --- a/test/unit/pipeline/RegisterAliasTableTest.cc +++ b/test/unit/pipeline/RegisterAliasTableTest.cc @@ -62,11 +62,15 @@ TEST_F(RegisterAliasTableTest, AllocateIndependent) { auto multiRAT = RegisterAliasTable({{8, architecturalCount}, {8, architecturalCount}}, {physicalCount, physicalCount}); + auto initialFreeRegisters0 = multiRAT.freeRegistersAvailable(0); auto initialFreeRegisters1 = multiRAT.freeRegistersAvailable(1); multiRAT.allocate(reg); - // Check that the same number of physical registers are still available + // Check 1 fewer physical registers are now available for regFile 0 + EXPECT_EQ(multiRAT.freeRegistersAvailable(0), initialFreeRegisters0 - 1); + // Check that the same number of physical registers are still available for + // regFile 1 EXPECT_EQ(multiRAT.freeRegistersAvailable(1), initialFreeRegisters1); } diff --git a/test/unit/pipeline/RenameUnitTest.cc b/test/unit/pipeline/RenameUnitTest.cc new file mode 100644 index 0000000000..23a1e3dd71 --- /dev/null +++ b/test/unit/pipeline/RenameUnitTest.cc @@ -0,0 +1,461 @@ +#include "../MockBranchPredictor.hh" +#include "../MockInstruction.hh" +#include "../MockMemoryInterface.hh" +#include "gtest/gtest.h" +#include "simeng/pipeline/RenameUnit.hh" + +namespace simeng { + +namespace pipeline { + +using ::testing::_; +using ::testing::Return; + +class RenameUnitTest : public testing::Test { + public: + RenameUnitTest() + : input(1, nullptr), + output(1, nullptr), + rat(archRegFileStruct, physRegCounts), + lsq( + lsqQueueSize, lsqQueueSize, memory, completionSlots, + [](auto registers, auto values) {}, [](auto insn) {}), + rob( + robSize, rat, lsq, [](auto insn) {}, [](auto branchAddr) {}, + predictor, 16, 4), + renameUnit(input, output, rob, rat, lsq, physRegCounts.size()), + uop(new MockInstruction), + uop2(new MockInstruction), + uop3(new MockInstruction), + uopPtr(uop), + uop2Ptr(uop2), + uop3Ptr(uop3) {} + + protected: + // 3rd register type has same arch & physical counts meaning renaming is not + // permitted. + const std::vector archRegFileStruct = { + {8, 10}, {24, 15}, {256, 31}}; + const std::vector physRegFileStruct = { + {8, 20}, {24, 30}, {256, 31}}; + const std::vector physRegCounts = {20, 30, 31}; + + const Register r0 = {0, 0}; + const Register r1 = {1, 2}; + const Register r2 = {2, 4}; + + const unsigned int robSize = 8; + const unsigned int lsqQueueSize = 10; + + PipelineBuffer> input; + PipelineBuffer> output; + + MockMemoryInterface memory; + MockBranchPredictor predictor; + span>> completionSlots; + + RegisterAliasTable rat; + LoadStoreQueue lsq; + ReorderBuffer rob; + + RenameUnit renameUnit; + + MockInstruction* uop; + MockInstruction* uop2; + MockInstruction* uop3; + + std::shared_ptr uopPtr; + std::shared_ptr uop2Ptr; + std::shared_ptr uop3Ptr; +}; + +// Test the correct functionality when input buffer and unit is empty +TEST_F(RenameUnitTest, emptyTick) { + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + + renameUnit.tick(); + + // Check output buffers and statistics are as expected + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + EXPECT_EQ(renameUnit.getAllocationStalls(), 0); + EXPECT_EQ(renameUnit.getROBStalls(), 0); + EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0); + EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0); +} + +// Test the normal functionality of an instruction passing through the unit +TEST_F(RenameUnitTest, tick) { + input.getHeadSlots()[0] = uopPtr; + + std::array destRegs = {r0}; + std::array srcRegs = {r0, r1}; + ON_CALL(*uop, getDestinationRegisters()) + .WillByDefault(Return(span(destRegs))); + ON_CALL(*uop, getSourceRegisters()) + .WillByDefault(Return(span(srcRegs))); + ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false)); + ON_CALL(*uop, isLoad()).WillByDefault(Return(false)); + ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(false)); + + // Setup expected calls to MockInstruction + EXPECT_CALL(*uop, isLoad()).Times(1); + EXPECT_CALL(*uop, isStoreAddress()).Times(1); + EXPECT_CALL(*uop, getDestinationRegisters()).Times(1); + EXPECT_CALL(*uop, getSourceRegisters()).Times(1); + EXPECT_CALL(*uop, isOperandReady(_)).Times(2); + EXPECT_CALL(*uop, renameSource(_, _)).Times(2); + EXPECT_CALL(*uop, renameDestination(0, _)).Times(1); + renameUnit.tick(); + + // Check output buffers and statistics are as expected + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0].get(), uop); + EXPECT_EQ(renameUnit.getAllocationStalls(), 0); + EXPECT_EQ(renameUnit.getROBStalls(), 0); + EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0); + EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0); + + // Check ROB, LSQ, and RAT mappings have been changed accordingly + EXPECT_EQ(rob.size(), 1); + EXPECT_EQ(rob.getFreeSpace(), robSize - 1); + EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize * 2); + const Register mappedReg = {0, archRegFileStruct[0].quantity}; + EXPECT_EQ(rat.getMapping(r0), mappedReg); + EXPECT_EQ(rat.getMapping(r1), r1); +} + +// Ensure input buffer is stalled when output buffer is stalled +TEST_F(RenameUnitTest, outputStall) { + output.stall(true); + renameUnit.tick(); + EXPECT_TRUE(input.isStalled()); +} + +// Test that an instruction exception is properly dealt with +TEST_F(RenameUnitTest, uopException) { + input.getHeadSlots()[0] = uopPtr; + uop->setExceptionEncountered(true); + + renameUnit.tick(); + + EXPECT_TRUE(uopPtr->canCommit()); + + EXPECT_EQ(rob.size(), 1); + EXPECT_EQ(rob.getFreeSpace(), robSize - 1); + EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize * 2); + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + + EXPECT_EQ(renameUnit.getAllocationStalls(), 0); + EXPECT_EQ(renameUnit.getROBStalls(), 0); + EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0); + EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0); +} + +// Test for when no physical registers are available +TEST_F(RenameUnitTest, noFreeRegs) { + // Take up all type-0 physical registers + // All arch regs originally mapped to phys reg, meaning remaing + // regs = physCount - archCount + for (int i = 0; i < physRegCounts[0] - archRegFileStruct[0].quantity; i++) { + rat.allocate(r0); + } + EXPECT_EQ(rat.freeRegistersAvailable(0), 0); + + input.getHeadSlots()[0] = uopPtr; + + std::array destRegs = {r0}; + ON_CALL(*uop, getDestinationRegisters()) + .WillByDefault(Return(span(destRegs))); + ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false)); + ON_CALL(*uop, isLoad()).WillByDefault(Return(false)); + ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(false)); + + // Setup expected calls to MockInstruction + EXPECT_CALL(*uop, isLoad()).Times(1); + EXPECT_CALL(*uop, isStoreAddress()).Times(1); + EXPECT_CALL(*uop, getDestinationRegisters()).Times(1); + renameUnit.tick(); + + EXPECT_TRUE(input.isStalled()); + + EXPECT_EQ(rob.size(), 0); + EXPECT_EQ(rob.getFreeSpace(), robSize); + EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize * 2); + EXPECT_EQ(input.getHeadSlots()[0], uopPtr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + + EXPECT_EQ(renameUnit.getAllocationStalls(), 1); + EXPECT_EQ(renameUnit.getROBStalls(), 0); + EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0); + EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0); +} + +// Tests that when ROB is full, no renaming occurs +TEST_F(RenameUnitTest, fullROB) { + // Pre-fill ROB + for (int i = 0; i < robSize; i++) { + rob.reserve(uopPtr); + } + EXPECT_EQ(rob.getFreeSpace(), 0); + + input.getHeadSlots()[0] = uopPtr; + renameUnit.tick(); + + EXPECT_TRUE(input.isStalled()); + + EXPECT_EQ(rob.size(), robSize); + EXPECT_EQ(rob.getFreeSpace(), 0); + EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize * 2); + EXPECT_EQ(input.getHeadSlots()[0], uopPtr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + + EXPECT_EQ(renameUnit.getAllocationStalls(), 0); + EXPECT_EQ(renameUnit.getROBStalls(), 1); + EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0); + EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0); +} + +// Test a LOAD instruction is handled correctly +TEST_F(RenameUnitTest, loadUop) { + input.getHeadSlots()[0] = uopPtr; + + std::array destRegs = {r0}; + std::array srcRegs = {r0, r1}; + ON_CALL(*uop, getDestinationRegisters()) + .WillByDefault(Return(span(destRegs))); + ON_CALL(*uop, getSourceRegisters()) + .WillByDefault(Return(span(srcRegs))); + ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false)); + ON_CALL(*uop, isLoad()).WillByDefault(Return(true)); + ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(false)); + + // Setup expected calls to MockInstruction + EXPECT_CALL(*uop, isLoad()).Times(1); + EXPECT_CALL(*uop, isStoreAddress()).Times(1); + EXPECT_CALL(*uop, getDestinationRegisters()).Times(1); + EXPECT_CALL(*uop, getSourceRegisters()).Times(1); + EXPECT_CALL(*uop, isOperandReady(_)).Times(2); + EXPECT_CALL(*uop, renameSource(_, _)).Times(2); + EXPECT_CALL(*uop, renameDestination(0, _)).Times(1); + renameUnit.tick(); + + // Check output buffers and statistics are as expected + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0].get(), uop); + EXPECT_EQ(renameUnit.getAllocationStalls(), 0); + EXPECT_EQ(renameUnit.getROBStalls(), 0); + EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0); + EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0); + + // Check ROB, LSQ, and RAT mappings have been changed accordingly + EXPECT_EQ(rob.size(), 1); + EXPECT_EQ(rob.getFreeSpace(), robSize - 1); + EXPECT_EQ(lsq.getLoadQueueSpace(), lsqQueueSize - 1); + EXPECT_EQ(lsq.getStoreQueueSpace(), lsqQueueSize); + EXPECT_EQ(lsq.getTotalSpace(), (lsqQueueSize * 2) - 1); + const Register mappedReg = {0, archRegFileStruct[0].quantity}; + EXPECT_EQ(rat.getMapping(r0), mappedReg); + EXPECT_EQ(rat.getMapping(r1), r1); +} + +// Test a LOAD instruction is handled correctly when Load queue is full +TEST_F(RenameUnitTest, loadUopQueueFull) { + // pre-fill Load Queue + for (int i = 0; i < lsqQueueSize; i++) { + lsq.addLoad(uopPtr); + } + EXPECT_EQ(lsq.getLoadQueueSpace(), 0); + + input.getHeadSlots()[0] = uopPtr; + + std::array destRegs = {r0}; + std::array srcRegs = {r0, r1}; + ON_CALL(*uop, getDestinationRegisters()) + .WillByDefault(Return(span(destRegs))); + ON_CALL(*uop, getSourceRegisters()) + .WillByDefault(Return(span(srcRegs))); + ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false)); + ON_CALL(*uop, isLoad()).WillByDefault(Return(true)); + ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(false)); + + // Setup expected calls to MockInstruction + EXPECT_CALL(*uop, isLoad()).Times(1); + EXPECT_CALL(*uop, isStoreAddress()).Times(1); + renameUnit.tick(); + + EXPECT_TRUE(input.isStalled()); + + // Check output buffers and statistics are as expected + EXPECT_EQ(input.getHeadSlots()[0], uopPtr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + EXPECT_EQ(renameUnit.getAllocationStalls(), 0); + EXPECT_EQ(renameUnit.getROBStalls(), 0); + EXPECT_EQ(renameUnit.getLoadQueueStalls(), 1); + EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0); + + // Check ROB, LSQ, and RAT mappings have been changed accordingly + EXPECT_EQ(rob.size(), 0); + EXPECT_EQ(rob.getFreeSpace(), robSize); + EXPECT_EQ(lsq.getLoadQueueSpace(), 0); + EXPECT_EQ(lsq.getStoreQueueSpace(), lsqQueueSize); + EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize); +} + +// Test a STORE instruction is handled correctly +TEST_F(RenameUnitTest, storeUop) { + input.getHeadSlots()[0] = uopPtr; + + std::array destRegs = {r0}; + std::array srcRegs = {r0, r1}; + ON_CALL(*uop, getDestinationRegisters()) + .WillByDefault(Return(span(destRegs))); + ON_CALL(*uop, getSourceRegisters()) + .WillByDefault(Return(span(srcRegs))); + ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false)); + ON_CALL(*uop, isLoad()).WillByDefault(Return(false)); + ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(true)); + + // Setup expected calls to MockInstruction + EXPECT_CALL(*uop, isLoad()).Times(1); + EXPECT_CALL(*uop, isStoreAddress()).Times(1); + EXPECT_CALL(*uop, getDestinationRegisters()).Times(1); + EXPECT_CALL(*uop, getSourceRegisters()).Times(1); + EXPECT_CALL(*uop, isOperandReady(_)).Times(2); + EXPECT_CALL(*uop, renameSource(_, _)).Times(2); + EXPECT_CALL(*uop, renameDestination(0, _)).Times(1); + renameUnit.tick(); + + // Check output buffers and statistics are as expected + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0].get(), uop); + EXPECT_EQ(renameUnit.getAllocationStalls(), 0); + EXPECT_EQ(renameUnit.getROBStalls(), 0); + EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0); + EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0); + + // Check ROB, LSQ, and RAT mappings have been changed accordingly + EXPECT_EQ(rob.size(), 1); + EXPECT_EQ(rob.getFreeSpace(), robSize - 1); + EXPECT_EQ(lsq.getLoadQueueSpace(), lsqQueueSize); + EXPECT_EQ(lsq.getStoreQueueSpace(), lsqQueueSize - 1); + EXPECT_EQ(lsq.getTotalSpace(), (lsqQueueSize * 2) - 1); + const Register mappedReg = {0, archRegFileStruct[0].quantity}; + EXPECT_EQ(rat.getMapping(r0), mappedReg); + EXPECT_EQ(rat.getMapping(r1), r1); +} + +// Test a STORE instruction is handled correctly when Store queue is full +TEST_F(RenameUnitTest, storeUopQueueFull) { + // pre-fill Load Queue + for (int i = 0; i < lsqQueueSize; i++) { + lsq.addStore(uopPtr); + } + EXPECT_EQ(lsq.getStoreQueueSpace(), 0); + + input.getHeadSlots()[0] = uopPtr; + + std::array destRegs = {r0}; + std::array srcRegs = {r0, r1}; + ON_CALL(*uop, getDestinationRegisters()) + .WillByDefault(Return(span(destRegs))); + ON_CALL(*uop, getSourceRegisters()) + .WillByDefault(Return(span(srcRegs))); + ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false)); + ON_CALL(*uop, isLoad()).WillByDefault(Return(false)); + ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(true)); + + // Setup expected calls to MockInstruction + EXPECT_CALL(*uop, isLoad()).Times(1); + EXPECT_CALL(*uop, isStoreAddress()).Times(1); + renameUnit.tick(); + + EXPECT_TRUE(input.isStalled()); + + // Check output buffers and statistics are as expected + EXPECT_EQ(input.getHeadSlots()[0], uopPtr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + EXPECT_EQ(renameUnit.getAllocationStalls(), 0); + EXPECT_EQ(renameUnit.getROBStalls(), 0); + EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0); + EXPECT_EQ(renameUnit.getStoreQueueStalls(), 1); + + // Check ROB, LSQ, and RAT mappings have been changed accordingly + EXPECT_EQ(rob.size(), 0); + EXPECT_EQ(rob.getFreeSpace(), robSize); + EXPECT_EQ(lsq.getLoadQueueSpace(), lsqQueueSize); + EXPECT_EQ(lsq.getStoreQueueSpace(), 0); + EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize); +} + +// Test to ensure Serialized destinations work correctly +TEST_F(RenameUnitTest, serializedDest) { + // A serialized uop can only proceed when the ROB is empty. Pre-add an + // instruction to ensure uop stalls correctly in renameUnit Pre-fill ROB + rob.reserve(uop2Ptr); + EXPECT_EQ(rob.size(), 1); + + // A serialized uop is caused when the destination register cannot be renamed + // - i.e. the number of archRegs is the same as physRegs + input.getHeadSlots()[0] = uopPtr; + std::array destRegs = {r2}; + std::array srcRegs = {r0, r1}; + ON_CALL(*uop, getDestinationRegisters()) + .WillByDefault(Return(span(destRegs))); + ON_CALL(*uop, getSourceRegisters()) + .WillByDefault(Return(span(srcRegs))); + ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false)); + ON_CALL(*uop, isLoad()).WillByDefault(Return(false)); + ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(false)); + + // On first tick, input should stall and uop should not proceed through + // renameUnit + EXPECT_CALL(*uop, isLoad()).Times(1); + EXPECT_CALL(*uop, isStoreAddress()).Times(1); + EXPECT_CALL(*uop, getDestinationRegisters()).Times(1); + renameUnit.tick(); + + EXPECT_TRUE(input.isStalled()); + EXPECT_EQ(input.getHeadSlots()[0], uopPtr); + EXPECT_EQ(output.getTailSlots()[0], nullptr); + + // Empty item in ROB + EXPECT_EQ(rob.size(), 1); + uop2Ptr->setCommitReady(); + EXPECT_CALL(*uop2, getDestinationRegisters()).Times(1); + EXPECT_CALL(*uop2, isLoad()).WillOnce(Return(false)); + EXPECT_CALL(*uop2, isStoreAddress()).WillOnce(Return(false)); + EXPECT_CALL(*uop2, isBranch()).WillOnce(Return(false)); + rob.commit(1); + EXPECT_EQ(rob.size(), 0); + + // Try tick again + EXPECT_CALL(*uop, isLoad()).Times(1); + EXPECT_CALL(*uop, isStoreAddress()).Times(1); + EXPECT_CALL(*uop, getDestinationRegisters()).Times(1); + EXPECT_CALL(*uop, getSourceRegisters()).Times(1); + EXPECT_CALL(*uop, isOperandReady(_)).Times(2); + EXPECT_CALL(*uop, renameSource(_, _)).Times(2); + renameUnit.tick(); + + // Check output buffers and statistics are as expected + EXPECT_EQ(input.getHeadSlots()[0], nullptr); + EXPECT_EQ(output.getTailSlots()[0].get(), uop); + EXPECT_EQ(renameUnit.getAllocationStalls(), 0); + EXPECT_EQ(renameUnit.getROBStalls(), 0); + EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0); + EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0); + + // Check ROB, LSQ, and RAT mappings have been changed accordingly + EXPECT_EQ(rob.size(), 1); + EXPECT_EQ(rob.getFreeSpace(), robSize - 1); + EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize * 2); + EXPECT_EQ(rat.getMapping(r0), r0); + EXPECT_EQ(rat.getMapping(r1), r1); + EXPECT_EQ(rat.getMapping(r2), r2); +} + +} // namespace pipeline +} // namespace simeng \ No newline at end of file diff --git a/test/unit/pipeline/ReorderBufferTest.cc b/test/unit/pipeline/ReorderBufferTest.cc index df217968e3..89039439e9 100644 --- a/test/unit/pipeline/ReorderBufferTest.cc +++ b/test/unit/pipeline/ReorderBufferTest.cc @@ -1,11 +1,9 @@ #include "../MockBranchPredictor.hh" #include "../MockInstruction.hh" #include "../MockMemoryInterface.hh" -#include "gmock/gmock.h" #include "gtest/gtest.h" #include "simeng/Instruction.hh" #include "simeng/pipeline/LoadStoreQueue.hh" -#include "simeng/pipeline/RegisterAliasTable.hh" #include "simeng/pipeline/ReorderBuffer.hh" using ::testing::_; @@ -31,12 +29,15 @@ class ReorderBufferTest : public testing::Test { [](auto registers, auto values) {}, [](auto uop) {}), uop(new MockInstruction), uop2(new MockInstruction), + uop3(new MockInstruction), uopPtr(uop), uopPtr2(uop2), + uopPtr3(uop3), reorderBuffer( maxROBSize, rat, lsq, [this](auto insn) { exceptionHandler.raiseException(insn); }, - [](auto branchAddress) {}, predictor, 0, 0) {} + [this](auto branchAddress) { loopBoundaryAddr = branchAddress; }, + predictor, 4, 2) {} protected: const uint8_t maxLSQLoads = 32; @@ -52,13 +53,17 @@ class ReorderBufferTest : public testing::Test { MockInstruction* uop; MockInstruction* uop2; + MockInstruction* uop3; std::shared_ptr uopPtr; - std::shared_ptr uopPtr2; + std::shared_ptr uopPtr2; + std::shared_ptr uopPtr3; MockMemoryInterface dataMemory; ReorderBuffer reorderBuffer; + + uint64_t loopBoundaryAddr = 0; }; // Tests that an instruction can have a slot reserved in the ROB and be @@ -100,6 +105,7 @@ TEST_F(ReorderBufferTest, Commit) { EXPECT_EQ(committed, 1); EXPECT_EQ(reorderBuffer.size(), 0); + EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 1); } // Tests that the reorder buffer won't commit an instruction if it's not ready @@ -110,6 +116,7 @@ TEST_F(ReorderBufferTest, CommitNotReady) { EXPECT_EQ(committed, 0); EXPECT_EQ(reorderBuffer.size(), 1); + EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 0); } // Tests that the reorder buffer won't commit a ready instruction if it's not at @@ -124,6 +131,7 @@ TEST_F(ReorderBufferTest, CommitHeadNotReady) { EXPECT_EQ(committed, 0); EXPECT_EQ(reorderBuffer.size(), 2); + EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 0); } // Tests that the reorder buffer can commit multiple ready instructions @@ -138,6 +146,7 @@ TEST_F(ReorderBufferTest, CommitMultiple) { EXPECT_EQ(committed, 2); EXPECT_EQ(reorderBuffer.size(), 0); + EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 2); } // Tests that the reorder buffer correctly informs the LSQ when committing a @@ -153,6 +162,7 @@ TEST_F(ReorderBufferTest, CommitLoad) { // Check that the load was removed from the LSQ EXPECT_EQ(lsq.getLoadQueueSpace(), maxLSQLoads); + EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 1); } // Tests that the reorder buffer correctly triggers a store upon commit @@ -190,6 +200,7 @@ TEST_F(ReorderBufferTest, CommitStore) { // Check that the store was committed and removed from the LSQ EXPECT_EQ(lsq.getStoreQueueSpace(), maxLSQStores); + EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 1); // Tick lsq to complete store lsq.tick(); @@ -221,6 +232,231 @@ TEST_F(ReorderBufferTest, Exception) { auto committed = reorderBuffer.commit(1); EXPECT_EQ(committed, 1); + EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 1); +} + +// Test the reorder buffer correctly sets a macro-op to commitReady when all of +// its associated micro-ops have been +TEST_F(ReorderBufferTest, commitMicroOps) { + // Reserve all microOps + uop->setIsMicroOp(true); + uop->setIsLastMicroOp(false); + uop2->setIsMicroOp(true); + uop2->setIsLastMicroOp(false); + uop3->setIsMicroOp(true); + uop3->setIsLastMicroOp(true); + reorderBuffer.reserve(uopPtr); + reorderBuffer.reserve(uopPtr2); + reorderBuffer.reserve(uopPtr3); + EXPECT_EQ(reorderBuffer.size(), 3); + + EXPECT_EQ(uopPtr->getInstructionId(), 0); + EXPECT_EQ(uopPtr2->getInstructionId(), 0); + EXPECT_EQ(uopPtr3->getInstructionId(), 0); + + // No micro-ops are waiting commit. Make sure they're not commit ready after + // call to `commitMicroOps` + reorderBuffer.commitMicroOps(0); + EXPECT_FALSE(uopPtr->canCommit()); + EXPECT_FALSE(uopPtr2->canCommit()); + EXPECT_FALSE(uopPtr3->canCommit()); + + // Set middle instruction as waitingCommit - ensure still not set commit ready + uop->setWaitingCommit(); + reorderBuffer.commitMicroOps(0); + EXPECT_FALSE(uopPtr->canCommit()); + EXPECT_FALSE(uopPtr2->canCommit()); + EXPECT_FALSE(uopPtr3->canCommit()); + + // Set last instruction as waitingCommit - ensure still not set commit ready + uop3->setWaitingCommit(); + reorderBuffer.commitMicroOps(0); + EXPECT_FALSE(uopPtr->canCommit()); + EXPECT_FALSE(uopPtr2->canCommit()); + EXPECT_FALSE(uopPtr3->canCommit()); + + // Set first instruction as waitingCommit - ensure still they are set commit + // ready now all micro-ops are done + uop2->setWaitingCommit(); + reorderBuffer.commitMicroOps(0); + EXPECT_TRUE(uopPtr->canCommit()); + EXPECT_TRUE(uopPtr2->canCommit()); + EXPECT_TRUE(uopPtr3->canCommit()); + + // Now call commit in ROB and make sure micro-ops are committed properly + unsigned int committed = reorderBuffer.commit(3); + EXPECT_EQ(committed, 3); + EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 1); + EXPECT_EQ(reorderBuffer.size(), 0); +} + +// Test that a detected violating load in the lsq leads to a flush +TEST_F(ReorderBufferTest, violatingLoad) { + const uint64_t strAddr = 16; + const uint64_t strSize = 4; + const uint64_t ldAddr = 18; + const uint64_t ldSize = 4; + + // Init Store + const MemoryAccessTarget strTarget = {strAddr, strSize}; + span strTargetSpan = {&strTarget, 1}; + ON_CALL(*uop, getGeneratedAddresses()).WillByDefault(Return(strTargetSpan)); + ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(true)); + ON_CALL(*uop, isStoreData()).WillByDefault(Return(true)); + uopPtr->setSequenceId(0); + uopPtr->setInstructionId(0); + lsq.addStore(uopPtr); + reorderBuffer.reserve(uopPtr); + // Init load + const MemoryAccessTarget ldTarget = {ldAddr, ldSize}; + span ldTargetSpan = {&ldTarget, 1}; + ON_CALL(*uop2, getGeneratedAddresses()).WillByDefault(Return(ldTargetSpan)); + ON_CALL(*uop2, isLoad()).WillByDefault(Return(true)); + uopPtr2->setSequenceId(1); + uopPtr2->setInstructionId(1); + uopPtr2->setInstructionAddress(4096); + lsq.addLoad(uopPtr2); + reorderBuffer.reserve(uopPtr2); + + EXPECT_EQ(reorderBuffer.size(), 2); + + // Start load "Out of order" + EXPECT_CALL(*uop2, getGeneratedAddresses()).Times(1); + EXPECT_CALL(*uop, getGeneratedAddresses()).Times(1); + lsq.startLoad(uopPtr2); + + // Set store "ready to commit" so that violation gets detected + uopPtr->setCommitReady(); + // Supply Store's data + RegisterValue strData = RegisterValue(0xABCD, strSize); + span strDataSpan = {&strData, 1}; + ON_CALL(*uop, getData()).WillByDefault(Return(strDataSpan)); + EXPECT_CALL(*uop, getData()).Times(1); + lsq.supplyStoreData(uopPtr); + + EXPECT_CALL(*uop, isStoreAddress()).WillOnce(Return(true)); + EXPECT_CALL(*uop, getGeneratedAddresses()).Times(1); // in LSQ + EXPECT_CALL(dataMemory, requestWrite(strTarget, strData)); // in LSQ + EXPECT_CALL(*uop2, getGeneratedAddresses()).Times(1); // in LSQ + unsigned int committed = reorderBuffer.commit(4); + + EXPECT_EQ(committed, 1); + EXPECT_EQ(reorderBuffer.size(), 1); + EXPECT_TRUE(reorderBuffer.shouldFlush()); + EXPECT_EQ(reorderBuffer.getViolatingLoadsCount(), 1); + EXPECT_EQ(lsq.getViolatingLoad(), uopPtr2); + EXPECT_EQ(reorderBuffer.getFlushAddress(), 4096); + EXPECT_EQ(reorderBuffer.getFlushInsnId(), 0); +} + +// Test that a branch is treated as expected, will trigger the loop buffer when +// seen enough times (loop detection threshold set to 2) +TEST_F(ReorderBufferTest, branch) { + // Set up branch instruction + const uint64_t insnAddr = 4096; + const uint64_t branchAddr = 1024; + BranchPrediction pred = {true, branchAddr}; + ON_CALL(*uop, isBranch()).WillByDefault(Return(true)); + uopPtr->setSequenceId(0); + uopPtr->setInstructionId(0); + uopPtr->setInstructionAddress(insnAddr); + uopPtr->setBranchPrediction(pred); + uopPtr->setCommitReady(); + + // First pass through ROB -- seen count reset to 0 as new branch + reorderBuffer.reserve(uopPtr); + EXPECT_CALL(*uop, isBranch()).Times(1); + reorderBuffer.commit(1); + EXPECT_NE(loopBoundaryAddr, insnAddr); + + // Second pass through ROB -- seen count = 1 + reorderBuffer.reserve(uopPtr); + EXPECT_CALL(*uop, isBranch()).Times(1); + reorderBuffer.commit(1); + EXPECT_NE(loopBoundaryAddr, insnAddr); + + // Third pass through ROB -- seen count = 2 + reorderBuffer.reserve(uopPtr); + EXPECT_CALL(*uop, isBranch()).Times(1); + reorderBuffer.commit(1); + EXPECT_NE(loopBoundaryAddr, insnAddr); + + // Fourth pass through ROB -- seen count = 3; exceeds detection theshold, + // loopBoundaryAddr updated + reorderBuffer.reserve(uopPtr); + EXPECT_CALL(*uop, isBranch()).Times(1); + reorderBuffer.commit(1); + EXPECT_EQ(loopBoundaryAddr, insnAddr); + + // Update prediction & reset loopBoundaryAddr. Flush ROB to reset loopDetected + pred = {false, branchAddr + 64}; + uopPtr->setBranchPrediction(pred); + loopBoundaryAddr = 0; + reorderBuffer.flush(0); + + // Re-do loop detecition + // First pass through ROB -- seen count reset to 0 as new branch + reorderBuffer.reserve(uopPtr); + EXPECT_CALL(*uop, isBranch()).Times(1); + reorderBuffer.commit(1); + EXPECT_NE(loopBoundaryAddr, insnAddr); + + // Second pass through ROB -- seen count = 1 + reorderBuffer.reserve(uopPtr); + EXPECT_CALL(*uop, isBranch()).Times(1); + reorderBuffer.commit(1); + EXPECT_NE(loopBoundaryAddr, insnAddr); + + // Third pass through ROB -- seen count = 2 + reorderBuffer.reserve(uopPtr); + EXPECT_CALL(*uop, isBranch()).Times(1); + reorderBuffer.commit(1); + EXPECT_NE(loopBoundaryAddr, insnAddr); + + // Fourth pass through ROB -- seen count = 3; exceeds detection theshold, + // loopBoundaryAddr updated + reorderBuffer.reserve(uopPtr); + EXPECT_CALL(*uop, isBranch()).Times(1); + reorderBuffer.commit(1); + EXPECT_EQ(loopBoundaryAddr, insnAddr); +} + +// Tests that only those destination registers which have been renamed are +// rewound upon a ROB flush +TEST_F(ReorderBufferTest, registerRewind) { + uop->setInstructionId(0); + uop->setSequenceId(0); + uop2->setInstructionId(1); + uop2->setSequenceId(1); + + // Reserve entries in ROB + reorderBuffer.reserve(uopPtr); + reorderBuffer.reserve(uopPtr2); + + // Rename one of the destination registers + Register archReg = {0, 1, 0}; + Register renamedReg = rat.allocate({0, 1}); + EXPECT_EQ(renamedReg.tag, 32); + + // Set destination registers for to be flushed uop2 with the second register + // not being renamed + std::vector destinations = {renamedReg, {0, 2, 0}}; + const span destinationSpan = { + const_cast(destinations.data()), 2}; + EXPECT_CALL(*uop2, getDestinationRegisters()) + .Times(1) + .WillRepeatedly(Return(destinationSpan)); + + // Check that mappings in RAT are correct + EXPECT_EQ(rat.getMapping(archReg).tag, 32); + EXPECT_EQ(rat.getMapping(destinations[1]).tag, 2); + + // Flush ROB + reorderBuffer.flush(0); + + // Check rewind occured on only the first destination register + EXPECT_EQ(rat.getMapping(archReg).tag, 1); + EXPECT_EQ(rat.getMapping(destinations[1]).tag, 2); } } // namespace pipeline diff --git a/test/unit/riscv/ArchInfoTest.cc b/test/unit/riscv/ArchInfoTest.cc new file mode 100644 index 0000000000..a086394633 --- /dev/null +++ b/test/unit/riscv/ArchInfoTest.cc @@ -0,0 +1,62 @@ +#include "gtest/gtest.h" +#include "simeng/arch/riscv/ArchInfo.hh" +#include "simeng/config/SimInfo.hh" +#include "simeng/version.hh" + +namespace simeng { +namespace arch { +namespace riscv { + +class RiscVArchInfoTest : public ::testing::Test { + public: + RiscVArchInfoTest() { + simeng::config::SimInfo::setConfig(SIMENG_SOURCE_DIR + "/configs/DEMO_RISCV.yaml"); + } + + protected: + const std::vector sysRegisterEnums = { + simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FFLAGS, + simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FRM, + simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FCSR, + simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_CYCLE, + simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_TIME, + simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_INSTRET}; + + const std::vector archRegStruct = { + {8, 32}, {8, 32}, {8, static_cast(sysRegisterEnums.size())}}; + + const std::vector physRegStruct = { + {8, 154}, {8, 90}, {8, static_cast(sysRegisterEnums.size())}}; + + const std::vector physRegQuants = { + 154, 90, static_cast(sysRegisterEnums.size())}; +}; + +// Test for the getSysRegEnums() function +TEST_F(RiscVArchInfoTest, getSysRegEnums) { + ArchInfo info = ArchInfo(config::SimInfo::getConfig()); + EXPECT_EQ(info.getSysRegEnums(), sysRegisterEnums); +} + +// Test for the getArchRegStruct() function +TEST_F(RiscVArchInfoTest, getArchRegStruct) { + ArchInfo info = ArchInfo(config::SimInfo::getConfig()); + EXPECT_EQ(info.getArchRegStruct(), archRegStruct); +} + +// Test for the getPhysRegStruct() function +TEST_F(RiscVArchInfoTest, getPhysRegStruct) { + ArchInfo info = ArchInfo(config::SimInfo::getConfig()); + EXPECT_EQ(info.getPhysRegStruct(), physRegStruct); +} + +// Test for the getPhysRegQuantities() function +TEST_F(RiscVArchInfoTest, getPhysRegQuantities) { + ArchInfo info = ArchInfo(config::SimInfo::getConfig()); + EXPECT_EQ(info.getPhysRegQuantities(), physRegQuants); +} + +} // namespace riscv +} // namespace arch +} // namespace simeng \ No newline at end of file diff --git a/test/unit/riscv/ArchitectureTest.cc b/test/unit/riscv/ArchitectureTest.cc new file mode 100644 index 0000000000..a934232fe4 --- /dev/null +++ b/test/unit/riscv/ArchitectureTest.cc @@ -0,0 +1,163 @@ +#include + +#include "../ConfigInit.hh" +#include "gtest/gtest.h" +#include "simeng/CoreInstance.hh" +#include "simeng/RegisterFileSet.hh" +#include "simeng/arch/aarch64/Architecture.hh" +#include "simeng/arch/riscv/Architecture.hh" +#include "simeng/span.hh" +#include "simeng/version.hh" + +namespace simeng { +namespace arch { +namespace riscv { + +// RISC-V Tests +class RiscVArchitectureTest : public testing::Test { + public: + RiscVArchitectureTest() + : kernel(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"] + .as()) { + arch = std::make_unique(kernel); + kernel.createProcess(process); + } + + protected: + // Setting core model to complex OoO model to more verbosely test the + // Architecture class. + ConfigInit configInit = ConfigInit(config::ISA::RV64, R"YAML({ + Core: { + Simulation-Mode: outoforder + }, + Ports: { + '0': {Portname: Port 0, Instruction-Group-Support: [INT_SIMPLE, INT_MUL, FLOAT]}, + '1': {Portname: Port 1, Instruction-Group-Support: [INT, FLOAT]}, + '2': {Portname: Port 2, Instruction-Group-Support: [INT_SIMPLE, INT_MUL, BRANCH]}, + '3': {Portname: Port 4, Instruction-Group-Support: [LOAD]}, + '4': {Portname: Port 5, Instruction-Group-Support: [LOAD]}, + '5': {Portname: Port 3, Instruction-Group-Support: [STORE]} + }, + Reservation-Stations: { + '0': {Size: 60, Dispatch-Rate: 4, Ports: [Port 0, Port 1, Port 2, Port 4, Port 5, Port 3]} + }, + Execution-Units: { + '0': {Pipelined: True}, + '1': {Pipelined: True}, + '2': {Pipelined: True}, + '3': {Pipelined: True}, + '4': {Pipelined: True}, + '5': {Pipelined: True} + }, + Latencies: { + '0': {Instruction-Groups: [INT_SIMPLE_ARTH, INT_SIMPLE_LOGICAL], Execution-Latency: 1, Execution-Throughput: 1}, + '1': {Instruction-Groups: [INT_MUL], Execution-Latency: 5, Execution-Throughput: 1}, + '2': {Instruction-Groups: [INT_DIV_OR_SQRT], Execution-Latency: 39, Execution-Throughput: 39}, + '3': {Instruction-Groups: [FLOAT_SIMPLE_CMP], Execution-Latency: 5, Execution-Throughput: 1}, + '4': {Instruction-Groups: [FLOAT_MUL], Execution-Latency: 6, Execution-Throughput: 1}, + '5': {Instruction-Groups: [FLOAT_SIMPLE_CVT], Execution-Latency: 7, Execution-Throughput: 1}, + '6': {Instruction-Groups: [FLOAT_DIV_OR_SQRT], Execution-Latency: 16, Execution-Throughput: 16} + } + })YAML"); + + // addi sp, ra, 2000 + std::array validInstrBytes = {0x13, 0x81, 0x00, 0x7d}; + std::array invalidInstrBytes = {0x7d, 0x00, 0x81, 0xbb}; + + std::unique_ptr arch; + kernel::Linux kernel; + kernel::LinuxProcess process = kernel::LinuxProcess( + span((char*)validInstrBytes.data(), validInstrBytes.size())); +}; + +TEST_F(RiscVArchitectureTest, predecode) { + // Test that mis-aligned instruction address results in error + MacroOp output; + uint8_t result = arch->predecode(validInstrBytes.data(), + validInstrBytes.size(), 0x7, output); + EXPECT_EQ(result, 1); + EXPECT_EQ(output[0]->getInstructionAddress(), 0x7); + EXPECT_EQ(output[0]->exceptionEncountered(), true); + + // Test that an invalid instruction returns instruction with an exception + output = MacroOp(); + result = arch->predecode(invalidInstrBytes.data(), invalidInstrBytes.size(), + 0x8, output); + EXPECT_EQ(result, 4); + EXPECT_EQ(output[0]->getInstructionAddress(), 0x8); + EXPECT_EQ(output[0]->exceptionEncountered(), true); + + // Test that an instruction can be properly decoded + output = MacroOp(); + result = arch->predecode(validInstrBytes.data(), validInstrBytes.size(), 0x4, + output); + EXPECT_EQ(result, 4); + EXPECT_EQ(output[0]->getInstructionAddress(), 0x4); + EXPECT_EQ(output[0]->exceptionEncountered(), false); +} + +TEST_F(RiscVArchitectureTest, getSystemRegisterTag) { + // Test incorrect system register will fail + int32_t output = arch->getSystemRegisterTag(-1); + EXPECT_EQ(output, -1); + + // Test for correct behaviour + output = arch->getSystemRegisterTag(RISCV_SYSREG_FFLAGS); + EXPECT_EQ(output, 0); +} + +TEST_F(RiscVArchitectureTest, handleException) { + // Get Instruction + MacroOp insn; + uint8_t bytes = arch->predecode(invalidInstrBytes.data(), + invalidInstrBytes.size(), 0x4, insn); + EXPECT_EQ(bytes, 4); + EXPECT_EQ(insn[0]->getInstructionAddress(), 0x4); + EXPECT_EQ(insn[0]->exceptionEncountered(), true); + + // Get Core + std::string executablePath = ""; + std::vector executableArgs = {}; + std::unique_ptr coreInstance = + std::make_unique(executablePath, executableArgs); + const Core& core = *coreInstance->getCore(); + MemoryInterface& memInt = *coreInstance->getDataMemory(); + auto exceptionHandler = arch->handleException(insn[0], core, memInt); + + bool tickRes = exceptionHandler->tick(); + auto result = exceptionHandler->getResult(); + EXPECT_TRUE(tickRes); + EXPECT_TRUE(result.fatal); + // Instruction address for fatal exception is always 0. + EXPECT_EQ(result.instructionAddress, 0x0); +} + +TEST_F(RiscVArchitectureTest, getInitialState) { + std::vector regs = {{RegisterType::GENERAL, 2}}; + std::vector regVals = {{kernel.getInitialStackPointer(), 8}}; + + arch::ProcessStateChange changes = arch->getInitialState(); + EXPECT_EQ(changes.type, arch::ChangeType::REPLACEMENT); + EXPECT_EQ(changes.modifiedRegisters, regs); + EXPECT_EQ(changes.modifiedRegisterValues, regVals); +} + +TEST_F(RiscVArchitectureTest, getMaxInstructionSize) { + EXPECT_EQ(arch->getMaxInstructionSize(), 4); +} + +TEST_F(RiscVArchitectureTest, updateSystemTimerRegisters) { + RegisterFileSet regFile = config::SimInfo::getArchRegStruct(); + Register cycleSystemReg = { + RegisterType::SYSTEM, + static_cast(arch->getSystemRegisterTag(RISCV_SYSREG_CYCLE))}; + + uint64_t ticks = 30; + EXPECT_EQ(regFile.get(cycleSystemReg), RegisterValue(0, 8)); + arch->updateSystemTimerRegisters(®File, ticks); + EXPECT_EQ(regFile.get(cycleSystemReg), RegisterValue(ticks, 8)); +} + +} // namespace riscv +} // namespace arch +} // namespace simeng diff --git a/test/unit/riscv/ExceptionHandlerTest.cc b/test/unit/riscv/ExceptionHandlerTest.cc new file mode 100644 index 0000000000..dc51f781e8 --- /dev/null +++ b/test/unit/riscv/ExceptionHandlerTest.cc @@ -0,0 +1,631 @@ +#include "../ConfigInit.hh" +#include "../MockCore.hh" +#include "../MockInstruction.hh" +#include "../MockMemoryInterface.hh" +#include "gmock/gmock.h" +#include "simeng/ArchitecturalRegisterFileSet.hh" +#include "simeng/arch/riscv/Architecture.hh" +#include "simeng/arch/riscv/ExceptionHandler.hh" +#include "simeng/arch/riscv/Instruction.hh" + +namespace simeng { +namespace arch { +namespace riscv { + +using ::testing::HasSubstr; +using ::testing::Return; +using ::testing::ReturnRef; + +class RiscVExceptionHandlerTest : public ::testing::Test { + public: + RiscVExceptionHandlerTest() + : kernel(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"] + .as()), + arch(kernel), + physRegFileSet(config::SimInfo::getArchRegStruct()), + archRegFileSet(physRegFileSet) {} + + protected: + ConfigInit configInit = ConfigInit(config::ISA::RV64, ""); + + MockCore core; + MockMemoryInterface memory; + kernel::Linux kernel; + Architecture arch; + + RegisterFileSet physRegFileSet; + ArchitecturalRegisterFileSet archRegFileSet; + + // addi sp, ra, 2000 --- Just need a valid instruction to hijack + std::array validInstrBytes = {0x13, 0x81, 0x00, 0x7d}; + + /** Helper constants for RISC-V general-purpose registers. */ + static constexpr Register R0 = {RegisterType::GENERAL, 10}; + static constexpr Register R1 = {RegisterType::GENERAL, 11}; + static constexpr Register R2 = {RegisterType::GENERAL, 12}; + static constexpr Register R3 = {RegisterType::GENERAL, 13}; + static constexpr Register R4 = {RegisterType::GENERAL, 14}; + static constexpr Register R5 = {RegisterType::GENERAL, 15}; + static constexpr Register R7 = {RegisterType::GENERAL, 17}; +}; + +// All system calls are tested in /test/regression/riscv/Syscall.cc + +// Test that a syscall is processed sucessfully +TEST_F(RiscVExceptionHandlerTest, testSyscall) { + // Create "syscall" instruction + uint64_t insnAddr = 0x4; + MacroOp uops; + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + InstructionException exception = InstructionException::SupervisorCall; + std::shared_ptr insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + insn->setInstructionAddress(insnAddr); + + // Setup register file for `uname` syscall (chosen as minimal functionality) + archRegFileSet.set(R0, RegisterValue(1234, 8)); + archRegFileSet.set(R7, RegisterValue(160, 8)); + + // Create ExceptionHandler + ExceptionHandler handler(insn, core, memory, kernel); + + // Tick exceptionHandler + ON_CALL(core, getArchitecturalRegisterFileSet()) + .WillByDefault(ReturnRef(archRegFileSet)); + EXPECT_CALL(core, getArchitecturalRegisterFileSet()).Times(1); + bool retVal = handler.tick(); + ExceptionResult result = handler.getResult(); + + EXPECT_TRUE(retVal); + EXPECT_FALSE(result.fatal); + EXPECT_EQ(result.instructionAddress, insnAddr + 4); + EXPECT_EQ(result.stateChange.type, ChangeType::REPLACEMENT); + std::vector modRegs = {R0}; + EXPECT_EQ(result.stateChange.modifiedRegisters, modRegs); + std::vector modRegVals = {{0ull, 8}}; + EXPECT_EQ(result.stateChange.modifiedRegisterValues, modRegVals); + std::vector modMemTargets = {{1234, 6}, + {1234 + 65, 13}, + {1234 + (65 * 2), 42}, + {1234 + (65 * 3), 35}, + {1234 + (65 * 4), 8}, + {1234 + (65 * 5), 7}}; + EXPECT_EQ(result.stateChange.memoryAddresses, modMemTargets); + std::vector modMemVals = { + RegisterValue("Linux"), + RegisterValue("fedora-riscv"), + RegisterValue("5.5.0-0.rc5.git0.1.1.riscv64.fc32.riscv64"), + RegisterValue("#1 SMP Mon Jan 6 17:31:22 UTC 2020"), + RegisterValue("riscv64"), + RegisterValue("(none)")}; + EXPECT_EQ(result.stateChange.memoryAddressValues, modMemVals); +} + +// Test that `readStringThen()` operates as expected +TEST_F(RiscVExceptionHandlerTest, readStringThen) { + // Create new mock instruction and ExceptionHandler + std::shared_ptr uopPtr(new MockInstruction); + ExceptionHandler handler(uopPtr, core, memory, kernel); + + // Initialise variables + size_t retVal = 0; + char* buffer; + buffer = (char*)malloc(256); + for (int i = 0; i < 256; i++) { + buffer[i] = 'q'; + } + uint64_t addr = 1024; + int maxLen = kernel::Linux::LINUX_PATH_MAX; + + MemoryAccessTarget target1 = {addr, 1}; + MemoryReadResult res1 = {target1, RegisterValue(0xAB, 1), 1}; + span res1Span = span(&res1, 1); + + MemoryAccessTarget target2 = {addr + 1, 1}; + MemoryReadResult res2 = {target2, RegisterValue(static_cast('\0'), 1), + 1}; + span res2Span = span(&res2, 1); + + // On first call to readStringThen, expect return of false and retVal to still + // be 0, and buffer to be filled with `q` + MemoryAccessTarget tar = {addr, 1}; + EXPECT_CALL(memory, requestRead(tar, 0)).Times(1); + bool outcome = + handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) { + retVal = length; + return true; + }); + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + for (int i = 0; i < 256; i++) { + EXPECT_EQ(buffer[i], 'q'); + } + + // ResumeHandling (called on tick()) should now be set to `readStringThen()` + // so call this for our second pass. + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span())); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + outcome = handler.tick(); + // No memory reads completed yet so again expect to return false and no change + // to `retval` or buffer + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + for (int i = 0; i < 256; i++) { + EXPECT_EQ(buffer[i], 'q'); + } + + // Call tick() again, but mimic a memory read completing + tar = {addr + 1, 1}; + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res1Span)); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + EXPECT_CALL(memory, requestRead(tar, 0)).Times(1); + outcome = handler.tick(); + // Completed read but still not complete, so outcome should be false, retVal + // unchanged, but some data in the buffer + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + for (int i = 0; i < 256; i++) { + if (i == 0) { + EXPECT_EQ(buffer[i], (char)0xAB); + } else { + EXPECT_EQ(buffer[i], 'q'); + } + } + + // Call tick() for a final time, getting the final read result + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res2Span)); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + outcome = handler.tick(); + // End of string '\0' found so expect `then()` to have been called, the + // outcome to be true, and the buffer again to have updated + EXPECT_TRUE(outcome); + EXPECT_EQ(retVal, 1); + for (int i = 0; i < 256; i++) { + if (i == 0) { + EXPECT_EQ(buffer[i], (char)0xAB); + } else if (i == 1) { + EXPECT_EQ(buffer[i], '\0'); + } else { + EXPECT_EQ(buffer[i], 'q'); + } + } +} + +// Test that in `readStringThen()` if max length is 0, then is called straight +// away +TEST_F(RiscVExceptionHandlerTest, readStringThen_maxLen0) { + // Create new mock instruction and ExceptionHandler + std::shared_ptr uopPtr(new MockInstruction); + ExceptionHandler handler(uopPtr, core, memory, kernel); + size_t retVal = 100; + char* buffer; + buffer = (char*)malloc(256); + for (int i = 0; i < 256; i++) { + buffer[i] = 'q'; + } + uint64_t addr = 1024; + int maxLen = 0; + + bool outcome = + handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) { + retVal = length; + return true; + }); + EXPECT_TRUE(outcome); + EXPECT_EQ(retVal, -1); + for (int i = 0; i < 256; i++) { + EXPECT_EQ(buffer[i], 'q'); + } +} + +// Test that in `readStringThen()` if max length has been met, then() is called +// and no more string is fetched +TEST_F(RiscVExceptionHandlerTest, readStringThen_maxLenReached) { + // Create new mock instruction and ExceptionHandler + std::shared_ptr uopPtr(new MockInstruction); + ExceptionHandler handler(uopPtr, core, memory, kernel); + + // Initialise variables + size_t retVal = 100; + char* buffer; + buffer = (char*)malloc(256); + for (int i = 0; i < 256; i++) { + buffer[i] = 'q'; + } + uint64_t addr = 1024; + int maxLen = 1; + + MemoryAccessTarget target1 = {addr, 1}; + MemoryReadResult res1 = {target1, RegisterValue(0xAB, 1), 1}; + span res1Span = span(&res1, 1); + + // On first call to readStringThen, expect return of false and retVal to still + // be 0, and buffer to be filled with `q` + MemoryAccessTarget tar = {addr, 1}; + EXPECT_CALL(memory, requestRead(tar, 0)).Times(1); + bool outcome = + handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) { + retVal = length; + return true; + }); + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 100); + for (int i = 0; i < 256; i++) { + EXPECT_EQ(buffer[i], 'q'); + } + + // ResumeHandling (called on tick()) should now be set to `readStringThen()` + // so call this for our second pass. + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span())); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + outcome = handler.tick(); + // No memory reads completed yet so again expect to return false and no change + // to `retval` or buffer + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 100); + for (int i = 0; i < 256; i++) { + EXPECT_EQ(buffer[i], 'q'); + } + + // Call tick() again, but mimic a memory read completing + ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res1Span)); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + outcome = handler.tick(); + // Completed read and maxLength reached. Expect then() to have been called, + // the outcome to be true, and the buffer to have updated. RetVal should be + // maxLength + EXPECT_TRUE(outcome); + EXPECT_EQ(retVal, 1); + for (int i = 0; i < 256; i++) { + if (i == 0) { + EXPECT_EQ(buffer[i], (char)0xAB); + } else { + EXPECT_EQ(buffer[i], 'q'); + } + } +} + +// Test that `readBufferThen()` operates as expected +TEST_F(RiscVExceptionHandlerTest, readBufferThen) { + // Create new mock instruction and ExceptionHandler + std::shared_ptr uopPtr(new MockInstruction); + uopPtr->setSequenceId(5); + ExceptionHandler handler(uopPtr, core, memory, kernel); + + // Initialise needed values for function + uint64_t retVal = 0; + uint64_t ptr = 0; + uint64_t length = 192; + + // Initialise data to "read" from MockMemory + std::vector dataVec(length, 'q'); + std::vector dataVec2(length, 'q'); + // Initialise the two required targets (128-bytes per read request in + // readBufferThen()) + MemoryAccessTarget tar1 = {ptr, 128}; + MemoryAccessTarget tar2 = {ptr + 128, static_cast(length - 128)}; + // Initialise "responses" from the MockMemory + MemoryReadResult res1 = {tar1, RegisterValue(dataVec.data() + ptr, 128), + uopPtr->getSequenceId()}; + MemoryReadResult res2 = { + tar2, RegisterValue(dataVec.data() + ptr + 128, length - 128), + uopPtr->getSequenceId()}; + + // Confirm that internal dataBuffer is empty + EXPECT_EQ(handler.dataBuffer.size(), 0); + + // Initial call to readBufferThen - expect resumeHandling to be updated to + // readBufferThen and a memory read request to have occurred + EXPECT_CALL(memory, requestRead(tar1, uopPtr->getSequenceId())).Times(1); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + bool outcome = handler.readBufferThen(ptr, length, [&retVal]() { + retVal = 10; + return true; + }); + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + EXPECT_EQ(handler.dataBuffer.size(), 0); + + // Can now call tick() - on call, emulate no reads completed + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span())); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + outcome = handler.tick(); + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + EXPECT_EQ(handler.dataBuffer.size(), 0); + + // Call tick() again, simulating completed read + new read requested as still + // data to fetch + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span(&res1, 1))); + // Make sure clearCompletedReads() alters functionality of getCompletedReads() + ON_CALL(memory, clearCompletedReads()) + .WillByDefault(::testing::InvokeWithoutArgs([&]() { + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span())); + })); + EXPECT_CALL(memory, getCompletedReads()).Times(2); + EXPECT_CALL(memory, clearCompletedReads()).Times(1); + EXPECT_CALL(memory, requestRead(tar2, uopPtr->getSequenceId())).Times(1); + outcome = handler.tick(); + EXPECT_FALSE(outcome); + EXPECT_EQ(retVal, 0); + EXPECT_EQ(handler.dataBuffer.size(), 128); + for (int i = 0; i < handler.dataBuffer.size(); i++) { + EXPECT_EQ(handler.dataBuffer[i], 'q'); + } + + // One final call to tick() to get last bits of data from memory and call + // then() + ON_CALL(memory, getCompletedReads()) + .WillByDefault(Return(span(&res2, 1))); + EXPECT_CALL(memory, getCompletedReads()).Times(1); + EXPECT_CALL(memory, clearCompletedReads()).Times(1); + outcome = handler.tick(); + EXPECT_TRUE(outcome); + EXPECT_EQ(retVal, 10); + EXPECT_EQ(handler.dataBuffer.size(), length); + for (int i = 0; i < length; i++) { + EXPECT_EQ(handler.dataBuffer[i], static_cast('q')); + } +} + +// Test that `readBufferThen()` calls then if length is 0 +TEST_F(RiscVExceptionHandlerTest, readBufferThen_length0) { + // Create new mock instruction and ExceptionHandler + std::shared_ptr uopPtr(new MockInstruction); + ExceptionHandler handler(uopPtr, core, memory, kernel); + + const size_t expectedVal = 10; + uint64_t retVal = 0; + uint64_t ptr = 0; + uint64_t length = 0; + + bool outcome = handler.readBufferThen(ptr, length, [&retVal]() { + retVal = 10; + return true; + }); + EXPECT_TRUE(outcome); + EXPECT_EQ(retVal, expectedVal); +} + +// Test that all RISC-V exception types print as expected +TEST_F(RiscVExceptionHandlerTest, printException) { + ON_CALL(core, getArchitecturalRegisterFileSet()) + .WillByDefault(ReturnRef(archRegFileSet)); + uint64_t insnAddr = 0x4; + MacroOp uops; + + // Create instruction for EncodingUnallocated + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + InstructionException exception = InstructionException::EncodingUnallocated; + std::shared_ptr insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_0(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + std::stringstream buffer; + std::streambuf* sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_0.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "encoding unallocated exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for ExecutionNotYetImplemented + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::ExecutionNotYetImplemented; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_1(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_1.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered execution " + "not-yet-implemented exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for AliasNotYetImplemented + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::AliasNotYetImplemented; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_2(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_2.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "alias not-yet-implemented exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for MisalignedPC + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::MisalignedPC; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_3(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_3.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered misaligned " + "program counter exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for DataAbort + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::DataAbort; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_4(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_4.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT( + buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered data abort exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for SupervisorCall + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::SupervisorCall; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_5(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_5.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT( + buffer.str(), + HasSubstr( + "[SimEng:ExceptionHandler] Encountered supervisor call exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for HypervisorCall + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::HypervisorCall; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_6(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_6.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT( + buffer.str(), + HasSubstr( + "[SimEng:ExceptionHandler] Encountered hypervisor call exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for SecureMonitorCall + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::SecureMonitorCall; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_7(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_7.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "secure monitor call exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for NoAvailablePort + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::NoAvailablePort; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_8(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_8.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "unsupported execution port exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for IllegalInstruction + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::IllegalInstruction; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_9(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_9.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "illegal instruction exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for PipelineFlush + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::PipelineFlush; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_10(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_10.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered " + "unknown atomic operation exception")); + buffer.str(std::string()); + uops.clear(); + + // Create instruction for default case + arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr, + uops); + exception = InstructionException::None; + insn = std::make_shared( + arch, static_cast(uops[0].get())->getMetadata(), exception); + // Create ExceptionHandler + ExceptionHandler handler_11(insn, core, memory, kernel); + // Capture std::cout and tick exceptionHandler + sbuf = std::cout.rdbuf(); // Save cout's buffer + std::cout.rdbuf(buffer.rdbuf()); // Redirect cout to buffer + handler_11.printException(*static_cast(insn.get())); + std::cout.rdbuf(sbuf); // Restore cout + EXPECT_THAT(buffer.str(), + HasSubstr("[SimEng:ExceptionHandler] Encountered unknown (id: " + "0) exception")); + buffer.str(std::string()); + uops.clear(); +} + +} // namespace riscv +} // namespace arch +} // namespace simeng \ No newline at end of file diff --git a/test/unit/riscv/InstructionTest.cc b/test/unit/riscv/InstructionTest.cc new file mode 100644 index 0000000000..37580c4f80 --- /dev/null +++ b/test/unit/riscv/InstructionTest.cc @@ -0,0 +1,620 @@ +#include "../ConfigInit.hh" +#include "../MockArchitecture.hh" +#include "arch/riscv/InstructionMetadata.hh" +#include "gmock/gmock.h" +#include "simeng/arch/riscv/Instruction.hh" +#include "simeng/version.hh" + +namespace simeng { +namespace arch { +namespace riscv { + +// RiscV Instruction Tests +class RiscVInstructionTest : public testing::Test { + public: + RiscVInstructionTest() + : os(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"] + .as()), + arch(os) { + // Create InstructionMetadata objects + cs_open(CS_ARCH_RISCV, CS_MODE_RISCV64, &capstoneHandle); + cs_option(capstoneHandle, CS_OPT_DETAIL, CS_OPT_ON); + + // Create instructions which cover the 3 main types: Arithmetic, Memory, + // Branch. This allows for full testing of the Instruction class. + + // div + cs_insn rawInsn_div; + cs_detail rawDetail_div; + rawInsn_div.detail = &rawDetail_div; + size_t size_div = 4; + uint64_t address_div = 0; + const uint8_t* encoding_div = + reinterpret_cast(divInstrBytes.data()); + cs_disasm_iter(capstoneHandle, &encoding_div, &size_div, &address_div, + &rawInsn_div); + divMetadata = std::make_unique(rawInsn_div); + + // lbu + cs_insn rawInsn_lbu; + cs_detail rawDetail_ldp; + rawInsn_lbu.detail = &rawDetail_ldp; + size_t size_lbu = 4; + uint64_t address_lbu = 0; + const uint8_t* encoding_lbu = + reinterpret_cast(lbuInstrBytes.data()); + cs_disasm_iter(capstoneHandle, &encoding_lbu, &size_lbu, &address_lbu, + &rawInsn_lbu); + lbuMetadata = std::make_unique(rawInsn_lbu); + + // bgeu + cs_insn rawInsn_bgeu; + cs_detail rawDetail_bgeu; + rawInsn_bgeu.detail = &rawDetail_bgeu; + size_t size_bgeu = 4; + uint64_t address_bgeu = 0; + const uint8_t* encoding_bgeu = + reinterpret_cast(bgeuInstrBytes.data()); + cs_disasm_iter(capstoneHandle, &encoding_bgeu, &size_bgeu, &address_bgeu, + &rawInsn_bgeu); + bgeuMetadata = std::make_unique(rawInsn_bgeu); + + const uint8_t* badEncoding = + reinterpret_cast(invalidInstrBytes.data()); + invalidMetadata = std::make_unique(badEncoding); + } + + ~RiscVInstructionTest() { cs_close(&capstoneHandle); } + + protected: + ConfigInit configInit = ConfigInit(config::ISA::RV64, ""); + + // div a3, a3, a0 + std::array divInstrBytes = {0xB3, 0xC6, 0xA6, 0x02}; + // lbu a5, 0(s3) + std::array lbuInstrBytes = {0x83, 0xC7, 0x09, 0x00}; + // bgeu a5, a4, -86 + std::array bgeuInstrBytes = {0xE3, 0xF5, 0xE7, 0xFA}; + std::array invalidInstrBytes = {0x20, 0x00, 0x02, 0x8c}; + + // A Capstone decoding library handle, for decoding instructions. + csh capstoneHandle; + + kernel::Linux os; + Architecture arch; + + std::unique_ptr divMetadata; + std::unique_ptr lbuMetadata; + std::unique_ptr bgeuMetadata; + std::unique_ptr invalidMetadata; + InstructionException exception; +}; + +// Test that a valid instruction is created correctly +TEST_F(RiscVInstructionTest, validInsn) { + // Insn is `div a3, a3, a0` + Instruction insn = Instruction(arch, *divMetadata.get()); + // Define instruction's registers + std::vector destRegs = {{RegisterType::GENERAL, 13}}; + std::vector srcRegs = {{RegisterType::GENERAL, 13}, + {RegisterType::GENERAL, 10}}; + const std::vector ports = {1, 2, 3}; + insn.setExecutionInfo({3, 4, ports}); + insn.setInstructionAddress(0x48); + insn.setInstructionId(11); + insn.setSequenceId(12); + + // Ensure that all instruction values are as expected after creation + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false; + EXPECT_EQ(&insn.getArchitecture(), &arch); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_TRUE(matchingPred); + EXPECT_EQ(insn.getBranchType(), BranchType::Unknown); + EXPECT_EQ(insn.getData().size(), 0); + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size()); + for (int i = 0; i < destRegs.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]); + } + EXPECT_EQ(insn.getException(), InstructionException::None); + EXPECT_EQ(insn.getGeneratedAddresses().size(), 0); + EXPECT_EQ(insn.getGroup(), InstructionGroups::INT_DIV_OR_SQRT); + EXPECT_EQ(insn.getInstructionAddress(), 0x48); + EXPECT_EQ(insn.getInstructionId(), 11); + EXPECT_EQ(insn.getKnownOffset(), 0); + EXPECT_EQ(insn.getLatency(), 3); + EXPECT_EQ(insn.getLSQLatency(), 1); + EXPECT_EQ(&insn.getMetadata(), divMetadata.get()); + EXPECT_EQ(insn.getMicroOpIndex(), 0); + EXPECT_EQ(insn.getResults().size(), 1); + EXPECT_EQ(insn.getSequenceId(), 12); + EXPECT_EQ(insn.getSourceOperands().size(), 2); + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size()); + for (int i = 0; i < srcRegs.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]); + EXPECT_FALSE(insn.isOperandReady(i)); + } + EXPECT_EQ(insn.getStallCycles(), 4); + EXPECT_EQ(insn.getSupportedPorts(), ports); + + EXPECT_FALSE(insn.canExecute()); + EXPECT_FALSE(insn.isStoreAddress()); + EXPECT_FALSE(insn.isStoreData()); + EXPECT_FALSE(insn.isLoad()); + EXPECT_FALSE(insn.isBranch()); + EXPECT_FALSE(insn.isAtomic()); + EXPECT_FALSE(insn.isFloat()); + EXPECT_FALSE(insn.exceptionEncountered()); + EXPECT_FALSE(insn.hasExecuted()); + EXPECT_FALSE(insn.canCommit()); + EXPECT_TRUE(insn.hasAllData()); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_FALSE(insn.isFlushed()); + EXPECT_FALSE(insn.isMicroOp()); + EXPECT_TRUE(insn.isLastMicroOp()); + EXPECT_FALSE(insn.isWaitingCommit()); +} + +// Test that an invalid instruction can be created - invalid due to byte stream +TEST_F(RiscVInstructionTest, invalidInsn_1) { + Instruction insn = Instruction(arch, *invalidMetadata.get()); + // Define instruction's registers + std::vector destRegs = {}; + std::vector srcRegs = {}; + const std::vector ports = {}; + insn.setExecutionInfo({1, 1, ports}); + insn.setInstructionAddress(0x44); + insn.setInstructionId(13); + insn.setSequenceId(14); + + // Ensure that all instruction values are as expected after creation + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false; + EXPECT_EQ(&insn.getArchitecture(), &arch); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_TRUE(matchingPred); + EXPECT_EQ(insn.getBranchType(), BranchType::Unknown); + EXPECT_EQ(insn.getData().size(), 0); + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size()); + for (int i = 0; i < destRegs.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]); + } + EXPECT_EQ(insn.getException(), InstructionException::EncodingUnallocated); + EXPECT_EQ(insn.getGeneratedAddresses().size(), 0); + // Default Group + EXPECT_EQ(insn.getGroup(), InstructionGroups::INT_SIMPLE_ARTH); + EXPECT_EQ(insn.getInstructionAddress(), 0x44); + EXPECT_EQ(insn.getInstructionId(), 13); + EXPECT_EQ(insn.getKnownOffset(), 0); + EXPECT_EQ(insn.getLatency(), 1); + EXPECT_EQ(insn.getLSQLatency(), 1); + EXPECT_EQ(&insn.getMetadata(), invalidMetadata.get()); + EXPECT_EQ(insn.getMicroOpIndex(), 0); + EXPECT_EQ(insn.getResults().size(), 0); + EXPECT_EQ(insn.getSequenceId(), 14); + EXPECT_EQ(insn.getSourceOperands().size(), 0); + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size()); + for (int i = 0; i < srcRegs.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]); + EXPECT_FALSE(insn.isOperandReady(i)); + } + EXPECT_EQ(insn.getStallCycles(), 1); + EXPECT_EQ(insn.getSupportedPorts(), ports); + + EXPECT_TRUE(insn.canExecute()); + EXPECT_FALSE(insn.isStoreAddress()); + EXPECT_FALSE(insn.isStoreData()); + EXPECT_FALSE(insn.isLoad()); + EXPECT_FALSE(insn.isBranch()); + EXPECT_FALSE(insn.isAtomic()); + EXPECT_FALSE(insn.isFloat()); + EXPECT_TRUE(insn.exceptionEncountered()); + EXPECT_FALSE(insn.hasExecuted()); + EXPECT_FALSE(insn.canCommit()); + EXPECT_TRUE(insn.hasAllData()); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_FALSE(insn.isFlushed()); + EXPECT_FALSE(insn.isMicroOp()); + EXPECT_TRUE(insn.isLastMicroOp()); + EXPECT_FALSE(insn.isWaitingCommit()); +} + +// Test that an invalid instruction can be created - invalid due to exception +// provided +TEST_F(RiscVInstructionTest, invalidInsn_2) { + Instruction insn = Instruction(arch, *invalidMetadata.get(), + InstructionException::HypervisorCall); + // Define instruction's registers + std::vector destRegs = {}; + std::vector srcRegs = {}; + const std::vector ports = {}; + insn.setExecutionInfo({1, 1, ports}); + insn.setInstructionAddress(0x43); + insn.setInstructionId(15); + insn.setSequenceId(16); + + // Ensure that all instruction values are as expected after creation + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false; + EXPECT_EQ(&insn.getArchitecture(), &arch); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_TRUE(matchingPred); + EXPECT_EQ(insn.getBranchType(), BranchType::Unknown); + EXPECT_EQ(insn.getData().size(), 0); + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size()); + for (int i = 0; i < destRegs.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]); + } + EXPECT_EQ(insn.getException(), InstructionException::HypervisorCall); + EXPECT_EQ(insn.getGeneratedAddresses().size(), 0); + // Default Group + EXPECT_EQ(insn.getGroup(), InstructionGroups::INT_SIMPLE_ARTH); + EXPECT_EQ(insn.getInstructionAddress(), 0x43); + EXPECT_EQ(insn.getInstructionId(), 15); + EXPECT_EQ(insn.getKnownOffset(), 0); + EXPECT_EQ(insn.getLatency(), 1); + EXPECT_EQ(insn.getLSQLatency(), 1); + EXPECT_EQ(&insn.getMetadata(), invalidMetadata.get()); + EXPECT_EQ(insn.getMicroOpIndex(), 0); + EXPECT_EQ(insn.getResults().size(), 0); + EXPECT_EQ(insn.getSequenceId(), 16); + EXPECT_EQ(insn.getSourceOperands().size(), 0); + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size()); + for (int i = 0; i < srcRegs.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]); + EXPECT_FALSE(insn.isOperandReady(i)); + } + EXPECT_EQ(insn.getStallCycles(), 1); + EXPECT_EQ(insn.getSupportedPorts(), ports); + + EXPECT_TRUE(insn.canExecute()); + EXPECT_FALSE(insn.isStoreAddress()); + EXPECT_FALSE(insn.isStoreData()); + EXPECT_FALSE(insn.isLoad()); + EXPECT_FALSE(insn.isBranch()); + EXPECT_FALSE(insn.isAtomic()); + EXPECT_FALSE(insn.isFloat()); + EXPECT_TRUE(insn.exceptionEncountered()); + EXPECT_FALSE(insn.hasExecuted()); + EXPECT_FALSE(insn.canCommit()); + EXPECT_TRUE(insn.hasAllData()); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_FALSE(insn.isFlushed()); + EXPECT_FALSE(insn.isMicroOp()); + EXPECT_TRUE(insn.isLastMicroOp()); + EXPECT_FALSE(insn.isWaitingCommit()); +} + +// Test to ensure that source and operand registers can be renamed correctly +TEST_F(RiscVInstructionTest, renameRegs) { + // Insn is `div a3, a3, a0` + Instruction insn = Instruction(arch, *divMetadata.get()); + // Define instruction's registers + std::vector destRegs = {{RegisterType::GENERAL, 13}}; + std::vector srcRegs = {{RegisterType::GENERAL, 13}, + {RegisterType::GENERAL, 10}}; + // Ensure registers decoded correctly + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size()); + for (int i = 0; i < srcRegs.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]); + } + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size()); + for (int i = 0; i < destRegs.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]); + } + + // Define renamed registers + std::vector destRegs_new = {{RegisterType::GENERAL, 24}}; + std::vector srcRegs_new = {{RegisterType::GENERAL, 13}, + {RegisterType::GENERAL, 97}}; + insn.renameDestination(0, destRegs_new[0]); + insn.renameSource(1, srcRegs_new[1]); + // Ensure renaming functionality works as expected + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs_new.size()); + for (int i = 0; i < srcRegs_new.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs_new[i]); + } + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs_new.size()); + for (int i = 0; i < destRegs_new.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs_new[i]); + } +} + +// Test that operand values can be properly supplied and change the state of +// `canExecute` +TEST_F(RiscVInstructionTest, supplyOperand) { + // Insn is `div a3, a3, a0` + Instruction insn = Instruction(arch, *divMetadata.get()); + // Define instruction's registers + std::vector destRegs = {{RegisterType::GENERAL, 13}}; + std::vector srcRegs = {{RegisterType::GENERAL, 13}, + {RegisterType::GENERAL, 10}}; + // Check initial state is as expected + EXPECT_FALSE(insn.canExecute()); + EXPECT_FALSE(insn.isOperandReady(0)); + EXPECT_FALSE(insn.isOperandReady(1)); + + // Define mock register values for source registers + RegisterValue val = {0xABBACAFE, 8}; + // Supply values for all source registers + insn.supplyOperand(0, val); + insn.supplyOperand(1, val); + // Ensure Instruction state has updated as expected + EXPECT_TRUE(insn.canExecute()); + EXPECT_TRUE(insn.isOperandReady(0)); + EXPECT_TRUE(insn.isOperandReady(1)); + auto sourceVals = insn.getSourceOperands(); + EXPECT_EQ(sourceVals.size(), 2); + EXPECT_EQ(sourceVals[0], val); + EXPECT_EQ(sourceVals[1], val); + + // Ensure instruction execute updates instruction state as expected, and + // produces the expected result. + EXPECT_FALSE(insn.hasExecuted()); + insn.execute(); + EXPECT_TRUE(insn.hasExecuted()); + auto results = insn.getResults(); + RegisterValue refRes = {0x00000001, 8}; + EXPECT_EQ(results.size(), 1); + EXPECT_EQ(results[0], refRes); +} + +// Test that data can be supplied successfully +TEST_F(RiscVInstructionTest, supplyData) { + // Insn is `lbu a5, 0(s3)` + Instruction insn = Instruction(arch, *lbuMetadata.get()); + // Define instruction's registers + std::vector destRegs = {{RegisterType::GENERAL, 15}}; + std::vector srcRegs = {{RegisterType::GENERAL, 19}}; + + // Check instruction created correctly + EXPECT_FALSE(insn.exceptionEncountered()); + EXPECT_EQ(&insn.getMetadata(), lbuMetadata.get()); + EXPECT_EQ(insn.getGroup(), InstructionGroups::LOAD_INT); + + // Check source and destination registers extracted correctly + EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size()); + for (int i = 0; i < srcRegs.size(); i++) { + EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]); + } + EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size()); + for (int i = 0; i < destRegs.size(); i++) { + EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]); + } + + // Supply needed operands + EXPECT_FALSE(insn.isOperandReady(0)); + RegisterValue addr = {0x480, 8}; + insn.supplyOperand(0, addr); + EXPECT_TRUE(insn.isOperandReady(0)); + + // Generate memory addresses + EXPECT_EQ(insn.getGeneratedAddresses().size(), 0); + insn.generateAddresses(); + auto generatedAddresses = insn.getGeneratedAddresses(); + EXPECT_EQ(generatedAddresses.size(), 1); + EXPECT_EQ(generatedAddresses[0].address, 0x480); + EXPECT_EQ(generatedAddresses[0].size, 1); + + // Supply required data + EXPECT_FALSE(insn.hasAllData()); + std::vector data = {{123, 1}}; + EXPECT_EQ(generatedAddresses.size(), data.size()); + insn.supplyData(generatedAddresses[0].address, data[0]); + // Ensure data was supplied correctly + auto retrievedData = insn.getData(); + for (int i = 0; i < retrievedData.size(); i++) { + EXPECT_EQ(retrievedData[i], data[i]); + } + EXPECT_TRUE(insn.hasAllData()); +} + +// Test DataAbort Exception is triggered correctly when supplying data +TEST_F(RiscVInstructionTest, supplyData_dataAbort) { + // Insn is `lbu a5, 0(s3)` + Instruction insn = Instruction(arch, *lbuMetadata.get()); + // Define instruction's registers + std::vector destRegs = {{RegisterType::GENERAL, 15}}; + std::vector srcRegs = {{RegisterType::GENERAL, 19}}; + + // Check instruction created correctly + EXPECT_EQ(&insn.getMetadata(), lbuMetadata.get()); + EXPECT_EQ(insn.getGroup(), InstructionGroups::LOAD_INT); + + // Supply needed operands + EXPECT_FALSE(insn.isOperandReady(0)); + RegisterValue addr = {0x480, 8}; + insn.supplyOperand(0, addr); + EXPECT_TRUE(insn.isOperandReady(0)); + + // Generate memory addresses + EXPECT_EQ(insn.getGeneratedAddresses().size(), 0); + insn.generateAddresses(); + auto generatedAddresses = insn.getGeneratedAddresses(); + EXPECT_EQ(generatedAddresses.size(), 1); + EXPECT_EQ(generatedAddresses[0].address, 0x480); + EXPECT_EQ(generatedAddresses[0].size, 1); + + // Trigger data abort + EXPECT_FALSE(insn.exceptionEncountered()); + insn.supplyData(generatedAddresses[0].address, RegisterValue()); + EXPECT_TRUE(insn.exceptionEncountered()); + EXPECT_EQ(insn.getException(), InstructionException::DataAbort); +} + +// Test to check logic around early branch misprediction logic +TEST_F(RiscVInstructionTest, earlyBranchMisprediction) { + // Insn is `div a3, a3, a0` + Instruction insn = Instruction(arch, *divMetadata.get()); + insn.setInstructionAddress(64); + + // Check initial state of an instruction's branch related options + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Unknown); + EXPECT_FALSE(insn.isBranch()); + std::tuple tup = {false, insn.getInstructionAddress() + 4}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); + + // Set prediction and ensure expected state changes / outcomes are seen + pred = {true, 0x4848}; + insn.setBranchPrediction(pred); + matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Unknown); + // Check logic of `checkEarlyBranchMisprediction` which is different for + // non-branch instructions + EXPECT_FALSE(insn.isBranch()); + tup = {true, insn.getInstructionAddress() + 4}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); +} + +// Test that a correct prediction (branch taken) is handled correctly +TEST_F(RiscVInstructionTest, correctPred_taken) { + // insn is `bgeu a5, a4, -86` + Instruction insn = Instruction(arch, *bgeuMetadata.get()); + insn.setInstructionAddress(400); + + // Check initial state of an instruction's branch related options + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Conditional); + EXPECT_TRUE(insn.isBranch()); + std::tuple tup = {false, 0}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); + + // Test a correct prediction where branch is taken is handled correctly + pred = {true, 400 - 86}; + insn.setBranchPrediction(pred); + matchingPred = (insn.getBranchPrediction() == pred); + insn.supplyOperand(0, RegisterValue(3, 8)); + insn.supplyOperand(1, RegisterValue(0, 8)); + insn.execute(); + EXPECT_TRUE(matchingPred); + EXPECT_TRUE(insn.wasBranchTaken()); + EXPECT_FALSE(insn.wasBranchMispredicted()); + EXPECT_EQ(insn.getBranchAddress(), pred.target); +} + +// Test that a correct prediction (branch not taken) is handled correctly +TEST_F(RiscVInstructionTest, correctPred_notTaken) { + // insn is `bgeu a5, a4, -86` + Instruction insn = Instruction(arch, *bgeuMetadata.get()); + insn.setInstructionAddress(400); + + // Check initial state of an instruction's branch related options + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Conditional); + EXPECT_TRUE(insn.isBranch()); + std::tuple tup = {false, 0}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); + + // Test a correct prediction where a branch isn't taken is handled correctly + // imm operand 0x28 has 4 added implicitly by dissassembler + pred = {false, 400 + 4}; + insn.setBranchPrediction(pred); + matchingPred = (insn.getBranchPrediction() == pred); + insn.supplyOperand(0, RegisterValue(0, 8)); + insn.supplyOperand(1, RegisterValue(3, 8)); + insn.execute(); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_FALSE(insn.wasBranchMispredicted()); + EXPECT_EQ(insn.getBranchAddress(), pred.target); +} + +// Test that an incorrect prediction (wrong target) is handled correctly +TEST_F(RiscVInstructionTest, incorrectPred_target) { + // insn is `bgeu a5, a4, -86` + Instruction insn = Instruction(arch, *bgeuMetadata.get()); + insn.setInstructionAddress(400); + + // Check initial state of an instruction's branch related options + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Conditional); + EXPECT_TRUE(insn.isBranch()); + std::tuple tup = {false, 0}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); + + // Test an incorrect prediction is handled correctly - target is wrong + // imm operand 0x28 has 4 added implicitly by dissassembler + pred = {true, 80 + (0x28 + 0x4)}; + insn.setBranchPrediction(pred); + matchingPred = (insn.getBranchPrediction() == pred); + insn.supplyOperand(0, RegisterValue(3, 8)); + insn.supplyOperand(1, RegisterValue(0, 8)); + insn.execute(); + EXPECT_TRUE(matchingPred); + EXPECT_TRUE(insn.wasBranchTaken()); + EXPECT_TRUE(insn.wasBranchMispredicted()); + EXPECT_EQ(insn.getBranchAddress(), 400 - 86); +} + +// Test that an incorrect prediction (wrong taken) is handled correctly +TEST_F(RiscVInstructionTest, incorrectPred_taken) { + // insn is `bgeu a5, a4, -86` + Instruction insn = Instruction(arch, *bgeuMetadata.get()); + insn.setInstructionAddress(400); + + // Check initial state of an instruction's branch related options + BranchPrediction pred = {false, 0}; + bool matchingPred = (insn.getBranchPrediction() == pred); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_EQ(insn.getBranchAddress(), 0); + EXPECT_EQ(insn.getBranchType(), BranchType::Conditional); + EXPECT_TRUE(insn.isBranch()); + std::tuple tup = {false, 0}; + EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); + + // Test an incorrect prediction is handled correctly - taken is wrong + // imm operand 0x28 has 4 added implicitly by dissassembler + pred = {true, 400 - 86}; + insn.setBranchPrediction(pred); + matchingPred = (insn.getBranchPrediction() == pred); + insn.supplyOperand(0, RegisterValue(0, 8)); + insn.supplyOperand(1, RegisterValue(3, 8)); + insn.execute(); + EXPECT_TRUE(matchingPred); + EXPECT_FALSE(insn.wasBranchTaken()); + EXPECT_TRUE(insn.wasBranchMispredicted()); + EXPECT_EQ(insn.getBranchAddress(), 400 + 4); +} + +// Test commit and flush setters such as `setFlushed`, `setCommitReady`, etc. +TEST_F(RiscVInstructionTest, setters) { + // Insn is `div a3, a3, a0` + Instruction insn = Instruction(arch, *divMetadata.get()); + + EXPECT_FALSE(insn.canCommit()); + insn.setCommitReady(); + EXPECT_TRUE(insn.canCommit()); + + EXPECT_FALSE(insn.isFlushed()); + insn.setFlushed(); + EXPECT_TRUE(insn.isFlushed()); + + EXPECT_FALSE(insn.isWaitingCommit()); + insn.setWaitingCommit(); + EXPECT_TRUE(insn.isWaitingCommit()); +} + +} // namespace riscv +} // namespace arch +} // namespace simeng \ No newline at end of file