diff --git a/CMakeLists.txt b/CMakeLists.txt
index cae649dddd..9bd70577f3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -215,7 +215,7 @@ endif()
   # saves us from having to build all targets before running the tests
   add_custom_target(test-all
     COMMAND ${CMAKE_CTEST_COMMAND}
-    DEPENDS unittests regression-aarch64 regression-riscv
+    DEPENDS unittests regression-aarch64 regression-riscv integrationtests
   )
 endif()
 
diff --git a/configs/DEMO_RISCV.yaml b/configs/DEMO_RISCV.yaml
index 42e7418b55..5f3387ef1f 100644
--- a/configs/DEMO_RISCV.yaml
+++ b/configs/DEMO_RISCV.yaml
@@ -139,7 +139,7 @@ Latencies:
 CPU-Info:
   # Set Generate-Special-Dir to 'T' to generate the special files directory, or to 'F' to not.
   # (Not generating the special files directory may require the user to copy over files manually)
-  Generate-Special-Dir: true
+  Generate-Special-Dir: True
   # Core-Count MUST be 1 as multi-core is not supported at this time. (TX2 true value is 32)
   Core-Count: 1
   # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (TX2 true value is 2)
diff --git a/docs/sphinx/user/configuring_simeng.rst b/docs/sphinx/user/configuring_simeng.rst
index cb5b31b215..344585f77b 100644
--- a/docs/sphinx/user/configuring_simeng.rst
+++ b/docs/sphinx/user/configuring_simeng.rst
@@ -349,9 +349,20 @@ CPU Info
     These fields are currently only used to generate a replica of the required Special Files directory structure.
 
 Generate-Special-Dir
-    Values are either "True" or "False".
-    Dictates whether or not SimEng should generate the SpecialFiles directory tree at runtime.
-    The alternative to this would be to copy in the required SpecialFiles by hand.
+    Values are either `True` or `False`.
+    Dictates whether or not SimEng should generate the Special-Files directory tree at runtime.
+    If your code requires Special-Files but you wish to use your own / existing files from a real system, you will need to set this option to `False`.
+    The files which are currently generated / supported in SimEng are:
+
+        - `/proc/cpuinfo`
+        - `/proc/stat`
+        - `/sys/deviced/system/cpu/online`
+        - `/sys/deviced/system/cpu/cpu{0..CoreCount}/topology/core_id`
+        - `/sys/deviced/system/cpu/cpu{0..CoreCount}/topology/physical_package_id`
+
+Special-File-Dir-Path
+    Represented as a String; is the **absolute path**  to the root directory where the Special-Files will be generated *OR* where existing Special-Files are located.
+    This is optional, and defaults to `SIMENG_BUILD_DIRECTORY/specialFiles`. The root directory must already exist.
 
 Core-Count
     Defines the total number of Physical cores (Not including threads).
diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh
index e08dc809ed..2e7b923a65 100644
--- a/src/include/simeng/CoreInstance.hh
+++ b/src/include/simeng/CoreInstance.hh
@@ -22,7 +22,7 @@
 
 // Program used when no executable is provided; counts down from
 // 1024*1024, with an independent `orr` at the start of each branch.
-uint32_t hex_[] = {
+static uint32_t hex_[] = {
     0x320C03E0,  // orr w0, wzr, #1048576
     0x320003E1,  // orr w0, wzr, #1
     0x71000400,  // subs w0, w0, #1
@@ -102,8 +102,11 @@ class CoreInstance {
   /** Construct the special file directory. */
   void createSpecialFileDirectory();
 
-  /** Whether or not the source has been assembled by LLVM. */
-  bool assembledSource_ = false;
+  /** The config file describing the modelled core to be created. */
+  ryml::ConstNodeRef config_;
+
+  /** The SimEng Linux kernel object. */
+  simeng::kernel::Linux kernel_;
 
   /** Reference to source assembled by LLVM. */
   char* source_ = nullptr;
@@ -111,8 +114,8 @@ class CoreInstance {
   /** Size of the source code assembled by LLVM. */
   size_t sourceSize_ = 0;
 
-  /** The config file describing the modelled core to be created. */
-  ryml::ConstNodeRef config_;
+  /** Whether or not the source has been assembled by LLVM. */
+  bool assembledSource_ = false;
 
   /** Reference to the SimEng linux process object. */
   std::unique_ptr<simeng::kernel::LinuxProcess> process_ = nullptr;
@@ -123,9 +126,6 @@ class CoreInstance {
   /** The process memory space. */
   std::shared_ptr<char> processMemory_;
 
-  /** The SimEng Linux kernel object. */
-  simeng::kernel::Linux kernel_;
-
   /** Whether or not the dataMemory_ must be set manually. */
   bool setDataMemory_ = false;
 
diff --git a/src/include/simeng/Instruction.hh b/src/include/simeng/Instruction.hh
index 81cf9d7e54..0b326f0ee5 100644
--- a/src/include/simeng/Instruction.hh
+++ b/src/include/simeng/Instruction.hh
@@ -256,7 +256,7 @@ class Instruction {
 
   /** An arbitrary index value for the micro-operation. Its use is based on the
    * implementation of specific micro-operations. */
-  int microOpIndex_;
+  int microOpIndex_ = 0;
 };
 
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh
index 1a0c37dd98..96614b93d9 100644
--- a/src/include/simeng/RegisterValue.hh
+++ b/src/include/simeng/RegisterValue.hh
@@ -131,4 +131,16 @@ class RegisterValue {
   alignas(8) char value[MAX_LOCAL_BYTES];
 };
 
+inline bool operator==(const RegisterValue& lhs, const RegisterValue& rhs) {
+  if (lhs.size() == rhs.size()) {
+    auto lhV = lhs.getAsVector<char>();
+    auto rhV = rhs.getAsVector<char>();
+    for (int i = 0; i < lhs.size(); i++) {
+      if (lhV[i] != rhV[i]) return false;
+    }
+    return true;
+  }
+  return false;
+}
+
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/SpecialFileDirGen.hh b/src/include/simeng/SpecialFileDirGen.hh
index a907727f54..a60c0d54ca 100644
--- a/src/include/simeng/SpecialFileDirGen.hh
+++ b/src/include/simeng/SpecialFileDirGen.hh
@@ -4,7 +4,6 @@
 #include <string>
 
 #include "simeng/config/SimInfo.hh"
-#include "simeng/version.hh"
 
 namespace simeng {
 class SpecialFileDirGen {
@@ -22,7 +21,7 @@ class SpecialFileDirGen {
 
  private:
   /** Path to the root of the SimEng special files directory. */
-  const std::string specialFilesDir_ = SIMENG_BUILD_DIR "/specialFiles";
+  const std::string specialFilesDir_;
 
   /** Values declared in YAML config file needed to create the Special Files
    * Directory tree. */
diff --git a/src/include/simeng/arch/aarch64/ExceptionHandler.hh b/src/include/simeng/arch/aarch64/ExceptionHandler.hh
index 3e59bc58eb..0f2a7c546c 100644
--- a/src/include/simeng/arch/aarch64/ExceptionHandler.hh
+++ b/src/include/simeng/arch/aarch64/ExceptionHandler.hh
@@ -96,6 +96,16 @@ class ExceptionHandler : public simeng::arch::ExceptionHandler {
   static constexpr Register R3 = {RegisterType::GENERAL, 3};
   static constexpr Register R4 = {RegisterType::GENERAL, 4};
   static constexpr Register R5 = {RegisterType::GENERAL, 5};
+
+  /** Let the following ExceptionHandlerTest derived classes be a friend of this
+   * class to allow proper testing of `readStringThen()`, `readBufferThen()` and
+   * `printException()` functions. */
+  friend class AArch64ExceptionHandlerTest_readStringThen_Test;
+  friend class AArch64ExceptionHandlerTest_readStringThen_maxLen0_Test;
+  friend class AArch64ExceptionHandlerTest_readStringThen_maxLenReached_Test;
+  friend class AArch64ExceptionHandlerTest_readBufferThen_Test;
+  friend class AArch64ExceptionHandlerTest_readBufferThen_length0_Test;
+  friend class AArch64ExceptionHandlerTest_printException_Test;
 };
 
 }  // namespace aarch64
diff --git a/src/include/simeng/arch/aarch64/Instruction.hh b/src/include/simeng/arch/aarch64/Instruction.hh
index d61bb43582..92ac0bc96d 100644
--- a/src/include/simeng/arch/aarch64/Instruction.hh
+++ b/src/include/simeng/arch/aarch64/Instruction.hh
@@ -13,38 +13,6 @@ namespace simeng {
 namespace arch {
 namespace aarch64 {
 
-/** Apply the shift specified by `shiftType` to the unsigned integer `value`,
- * shifting by `amount`. */
-template <typename T>
-std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>, T> shiftValue(
-    T value, uint8_t shiftType, uint8_t amount) {
-  switch (shiftType) {
-    case ARM64_SFT_LSL:
-      return value << amount;
-    case ARM64_SFT_LSR:
-      return value >> amount;
-    case ARM64_SFT_ASR:
-      return static_cast<std::make_signed_t<T>>(value) >> amount;
-    case ARM64_SFT_ROR: {
-      // Assuming sizeof(T) is a power of 2.
-      const auto mask = sizeof(T) * 8 - 1;
-      assert((amount <= mask) && "Rotate amount exceeds type width");
-      amount &= mask;
-      return (value >> amount) | (value << ((-amount) & mask));
-    }
-    case ARM64_SFT_MSL: {
-      // pad in with ones instead of zeros
-      const auto mask = (1 << amount) - 1;
-      return (value << amount) | mask;
-    }
-    case ARM64_SFT_INVALID:
-      return value;
-    default:
-      assert(false && "Unknown shift type");
-      return 0;
-  }
-}
-
 /** Get the size of the data to be accessed from/to memory. */
 inline uint8_t getDataSize(cs_arm64_op op) {
   // Check from top of the range downwards
@@ -203,6 +171,9 @@ const uint8_t NZCV = 3;
 const uint8_t SYSTEM = 4;
 /** The [256-byte x (SVL / 8)] SME matrix register za. */
 const uint8_t MATRIX = 5;
+
+/** A special register value representing the zero register. */
+const Register ZERO_REGISTER = {GENERAL, (uint16_t)-1};
 }  // namespace RegisterType
 
 /** A struct holding user-defined execution information for a aarch64
@@ -222,7 +193,6 @@ struct ExecutionInfo {
 enum class InstructionException {
   None = 0,
   EncodingUnallocated,
-  EncodingNotYetImplemented,
   ExecutionNotYetImplemented,
   AliasNotYetImplemented,
   MisalignedPC,
@@ -366,11 +336,6 @@ class Instruction : public simeng::Instruction {
   /** Retrieve the instruction's associated architecture. */
   const Architecture& getArchitecture() const;
 
-  /** A special register value representing the zero register. If passed to
-   * `setSourceRegisters`/`setDestinationRegisters`, the value will be
-   * automatically supplied as zero. */
-  static const Register ZERO_REGISTER;
-
  private:
   /** A reference to the ISA instance this instruction belongs to. */
   const Architecture& architecture_;
@@ -380,11 +345,10 @@ class Instruction : public simeng::Instruction {
 
   /** A vector of source registers. */
   std::vector<Register> sourceRegisters;
-  /** The number of source registers this instruction reads from. */
-  uint16_t sourceRegisterCount = 0;
 
   /** A vector of destination registers. */
   std::vector<Register> destinationRegisters;
+
   /** The number of destination registers this instruction writes to. */
   uint16_t destinationRegisterCount = 0;
 
@@ -404,15 +368,6 @@ class Instruction : public simeng::Instruction {
    * registers. */
   void decode();
 
-  /** Set the source registers of the instruction, and create a corresponding
-   * operands vector. Zero register references will be pre-supplied with a value
-   * of 0. */
-  void setSourceRegisters(const std::vector<Register>& registers);
-
-  /** Set the destination registers for the instruction, and create a
-   * corresponding results vector. */
-  void setDestinationRegisters(const std::vector<Register>& registers);
-
   // Scheduling
   /** The number of operands that have not yet had values supplied. Used to
    * determine execution readiness. */
@@ -499,14 +454,6 @@ class Instruction : public simeng::Instruction {
    * for sending to memory (according to instruction type). Each entry
    * corresponds to a `memoryAddresses` entry. */
   std::vector<RegisterValue> memoryData;
-
-  // Execution helpers
-  /** Extend `value` according to `extendType`, and left-shift the result by
-   * `shift` */
-  uint64_t extendValue(uint64_t value, uint8_t extendType, uint8_t shift) const;
-
-  /** Extend `value` using extension/shifting rules defined in `op`. */
-  uint64_t extendOffset(uint64_t value, const cs_arm64_op& op) const;
 };
 
 }  // namespace aarch64
diff --git a/src/include/simeng/arch/aarch64/MicroDecoder.hh b/src/include/simeng/arch/aarch64/MicroDecoder.hh
index f91f4041d7..f13fb02077 100644
--- a/src/include/simeng/arch/aarch64/MicroDecoder.hh
+++ b/src/include/simeng/arch/aarch64/MicroDecoder.hh
@@ -30,6 +30,7 @@ class MicroDecoder {
                  const Instruction& macroOp, MacroOp& output,
                  csh capstoneHandle);
 
+ private:
   /** Detect if there's an overlap between the underlying hardware registers
    * (e.g. z5, v5, q5, d5, s5, h5, and b5). */
   bool detectOverlap(arm64_reg registerA, arm64_reg registerB);
@@ -67,7 +68,6 @@ class MicroDecoder {
                            csh capstoneHandle, bool lastMicroOp = false,
                            int microOpIndex = 0, uint8_t dataSize = 0);
 
- private:
   /** Flag to determine whether instruction splitting is enabled. */
   const bool instructionSplit_;
 
diff --git a/src/include/simeng/arch/aarch64/helpers/arithmetic.hh b/src/include/simeng/arch/aarch64/helpers/arithmetic.hh
index 13485c16fa..cdf8a19ed7 100644
--- a/src/include/simeng/arch/aarch64/helpers/arithmetic.hh
+++ b/src/include/simeng/arch/aarch64/helpers/arithmetic.hh
@@ -5,204 +5,190 @@
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class arithmeticHelp {
- public:
-  /** Helper function for instructions with the format `add rd, rn, rm`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T add_3ops(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>();
-    return (n + m);
-  }
 
-  /** Helper function for instructions with the format `adc rd, rn, rm`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of [resulting value, nzcv]. */
-  template <typename T>
-  static std::tuple<T, uint8_t> addCarry_3ops(
-      std::vector<RegisterValue>& operands) {
-    const uint8_t carry = operands[0].get<uint8_t>() & 0b0010;
-    const T n = operands[1].get<T>();
-    const T m = operands[2].get<T>();
-    return AuxFunc::addWithCarry(n, m, carry);
-  }
+/** Helper function for instructions with the format `add rd, rn, rm`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T add_3ops(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>();
+  return (n + m);
+}
 
-  /** Helper function for instructions with the format `add rd, rn, rm{, extend
-   * {#amount}}`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of [resulting value, nzcv]. */
-  template <typename T>
-  static std::tuple<T, uint8_t> addExtend_3ops(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      bool calcNZCV) {
-    const T n = operands[0].get<T>();
-    const T m =
-        AuxFunc::extendValue(operands[1].get<T>(), metadata.operands[2].ext,
-                             metadata.operands[2].shift.value);
-    if (calcNZCV) return AuxFunc::addWithCarry(n, m, 0);
-    return {(n + m), 0};
-  }
+/** Helper function for instructions with the format `adc rd, rn, rm`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of [resulting value, nzcv]. */
+template <typename T>
+std::tuple<T, uint8_t> addCarry_3ops(std::vector<RegisterValue>& operands) {
+  const uint8_t carry = operands[0].get<uint8_t>() & 0b0010;
+  const T n = operands[1].get<T>();
+  const T m = operands[2].get<T>();
+  return addWithCarry(n, m, carry);
+}
 
-  /** Helper function for instructions with the format `add rd, rn, rm{, shift
-   * #amount}`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of [resulting value, nzcv]. */
-  template <typename T>
-  static std::tuple<T, uint8_t> addShift_3ops(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      bool calcNZCV) {
-    const T n = operands[0].get<T>();
-    const T m =
-        shiftValue(operands[1].get<T>(), metadata.operands[2].shift.type,
-                   metadata.operands[2].shift.value);
-    if (calcNZCV) return AuxFunc::addWithCarry(n, m, 0);
-    return {(n + m), 0};
-  }
+/** Helper function for instructions with the format `add rd, rn, rm{, extend
+ * {#amount}}`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of [resulting value, nzcv]. */
+template <typename T>
+std::tuple<T, uint8_t> addExtend_3ops(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) {
+  const T n = operands[0].get<T>();
+  const T m = extendValue(operands[1].get<T>(), metadata.operands[2].ext,
+                          metadata.operands[2].shift.value);
+  if (calcNZCV) return addWithCarry(n, m, 0);
+  return {(n + m), 0};
+}
 
-  /** Helper function for instructions with the format `add rd, rn, #imm{, shift
-   * #amount}`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of [resulting value, nzcv]. */
-  template <typename T>
-  static std::tuple<T, uint8_t> addShift_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      bool calcNZCV) {
-    const T n = operands[0].get<T>();
-    const T m = shiftValue(static_cast<T>(metadata.operands[2].imm),
-                           metadata.operands[2].shift.type,
-                           metadata.operands[2].shift.value);
-    if (calcNZCV) return AuxFunc::addWithCarry(n, m, 0);
-    return {(n + m), 0};
-  }
+/** Helper function for instructions with the format `add rd, rn, rm{, shift
+ * #amount}`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of [resulting value, nzcv]. */
+template <typename T>
+std::tuple<T, uint8_t> addShift_3ops(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) {
+  const T n = operands[0].get<T>();
+  const T m = shiftValue(operands[1].get<T>(), metadata.operands[2].shift.type,
+                         metadata.operands[2].shift.value);
+  if (calcNZCV) return addWithCarry(n, m, 0);
+  return {(n + m), 0};
+}
+
+/** Helper function for instructions with the format `add rd, rn, #imm{, shift
+ * #amount}`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of [resulting value, nzcv]. */
+template <typename T>
+std::tuple<T, uint8_t> addShift_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) {
+  const T n = operands[0].get<T>();
+  const T m = shiftValue(static_cast<T>(metadata.operands[2].imm),
+                         metadata.operands[2].shift.type,
+                         metadata.operands[2].shift.value);
+  if (calcNZCV) return addWithCarry(n, m, 0);
+  return {(n + m), 0};
+}
 
-  /** Helper function for instructions with the format `clz rd, rn`.
-   * T represents the type of operands (e.g. for xn, T = int64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T clz_reg(std::vector<RegisterValue>& operands) {
-    T x = operands[0].get<T>();
-    uint8_t i;
-    for (i = 0; i < (sizeof(T) * 8); i++) {
-      // Left-shift x until it's negative or we run out of bits
-      if (x < 0) {
-        break;
-      }
-      x <<= 1;
+/** Helper function for instructions with the format `clz rd, rn`.
+ * T represents the type of operands (e.g. for xn, T = int64_t).
+ * Returns single value of type T. */
+template <typename T>
+T clz_reg(std::vector<RegisterValue>& operands) {
+  T x = operands[0].get<T>();
+  uint8_t i;
+  for (i = 0; i < (sizeof(T) * 8); i++) {
+    // Left-shift x until it's negative or we run out of bits
+    if (x < 0) {
+      break;
     }
-    return i;
+    x <<= 1;
   }
+  return i;
+}
 
-  /** Helper function for instructions with the format `movk <w,x>d, #imm`.
-   * T represents the type of operands (e.g. for xd, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T movkShift_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    // Clear 16-bit region offset by `shift` and replace with immediate
-    uint8_t shift = metadata.operands[1].shift.value;
-    T mask = ~(static_cast<T>(0xFFFF) << shift);
-    T value =
-        (operands[0].get<T>() & mask) | (metadata.operands[1].imm << shift);
-    return value;
-  }
+/** Helper function for instructions with the format `movk <w,x>d, #imm`.
+ * T represents the type of operands (e.g. for xd, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T movkShift_imm(std::vector<RegisterValue>& operands,
+                const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  // Clear 16-bit region offset by `shift` and replace with immediate
+  uint8_t shift = metadata.operands[1].shift.value;
+  T mask = ~(static_cast<T>(0xFFFF) << shift);
+  T value = (operands[0].get<T>() & mask) | (metadata.operands[1].imm << shift);
+  return value;
+}
 
-  /** Helper function for instructions with the format `mov<n,z> <w,x>d, #imm{,
-   * lsl #shift}`.
-   * T represents the type of operands (e.g. for xd, T = uint64_t).
-   * Returns single value og type uint64_t. */
-  template <typename T>
-  static uint64_t movnShift_imm(
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      std::function<T(uint64_t)> func) {
-    uint8_t shift = metadata.operands[1].shift.value;
-    T value = func(static_cast<uint64_t>(metadata.operands[1].imm) << shift);
-    return static_cast<uint64_t>(value);
-  }
+/** Helper function for instructions with the format `mov<n,z> <w,x>d, #imm{,
+ * lsl #shift}`.
+ * T represents the type of operands (e.g. for xd, T = uint64_t).
+ * Returns single value og type uint64_t. */
+template <typename T>
+uint64_t movnShift_imm(
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    std::function<T(uint64_t)> func) {
+  uint8_t shift = metadata.operands[1].shift.value;
+  T value = func(static_cast<uint64_t>(metadata.operands[1].imm) << shift);
+  return static_cast<uint64_t>(value);
+}
 
-  /** Helper function for instructions with the format `msubl xd, wn, wm, xa`.
-   * D represents the type of the destination register (either int64_t or
-   * uint64_t).
-   * N represents the type of the first source register (either
-   * int32_t or uint32_t).
-   * Returns single value of type D. */
-  template <typename D, typename N>
-  static D msubl_4ops(std::vector<RegisterValue>& operands) {
-    const N n = operands[0].get<N>();
-    const N m = operands[1].get<N>();
-    const D a = operands[2].get<D>();
-    return (a - (n * m));
-  }
+/** Helper function for instructions with the format `msubl xd, wn, wm, xa`.
+ * D represents the type of the destination register (either int64_t or
+ * uint64_t).
+ * N represents the type of the first source register (either
+ * int32_t or uint32_t).
+ * Returns single value of type D. */
+template <typename D, typename N>
+D msubl_4ops(std::vector<RegisterValue>& operands) {
+  const N n = operands[0].get<N>();
+  const N m = operands[1].get<N>();
+  const D a = operands[2].get<D>();
+  return (a - (n * m));
+}
 
-  /** Helper function for instructions with the format `sbc rd, rn, rm`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T sbc(std::vector<RegisterValue>& operands) {
-    auto nzcv = operands[0].get<uint8_t>();
-    const T x = operands[1].get<T>();
-    const T y = operands[2].get<T>();
-    T result;
-    std::tie(result, std::ignore) =
-        AuxFunc::addWithCarry(x, ~y, (nzcv >> 1) & 1);
-    return result;
-  }
+/** Helper function for instructions with the format `sbc rd, rn, rm`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T sbc(std::vector<RegisterValue>& operands) {
+  auto nzcv = operands[0].get<uint8_t>();
+  const T x = operands[1].get<T>();
+  const T y = operands[2].get<T>();
+  T result;
+  std::tie(result, std::ignore) = addWithCarry(x, ~y, (nzcv >> 1) & 1);
+  return result;
+}
 
-  /** Helper function for instructions with the format `sub{s} rd, rn, rm{,
-   * extend #amount}`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of [resulting value, nzcv]. */
-  template <typename T>
-  static std::tuple<T, uint8_t> subExtend_3ops(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      bool calcNZCV) {
-    const T n = operands[0].get<T>();
-    const T m = static_cast<T>(
-        AuxFunc::extendValue(operands[1].get<T>(), metadata.operands[2].ext,
-                             metadata.operands[2].shift.value));
-    if (calcNZCV) return AuxFunc::addWithCarry(n, ~m, true);
-    return {(n - m), 0};
-  }
+/** Helper function for instructions with the format `sub{s} rd, rn, rm{,
+ * extend #amount}`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of [resulting value, nzcv]. */
+template <typename T>
+std::tuple<T, uint8_t> subExtend_3ops(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) {
+  const T n = operands[0].get<T>();
+  const T m =
+      static_cast<T>(extendValue(operands[1].get<T>(), metadata.operands[2].ext,
+                                 metadata.operands[2].shift.value));
+  if (calcNZCV) return addWithCarry(n, ~m, true);
+  return {(n - m), 0};
+}
 
-  /** Helper function for instructions with the format `sub{s} rd, rn, #imm`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static std::tuple<T, uint8_t> subShift_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      bool calcNZCV) {
-    const T n = operands[0].get<T>();
-    const T m = shiftValue(static_cast<T>(metadata.operands[2].imm),
-                           metadata.operands[2].shift.type,
-                           metadata.operands[2].shift.value);
-    if (calcNZCV) return AuxFunc::addWithCarry(n, ~m, true);
-    return {(n - m), 0};
-  }
+/** Helper function for instructions with the format `sub{s} rd, rn, #imm`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+std::tuple<T, uint8_t> subShift_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) {
+  const T n = operands[0].get<T>();
+  const T m = shiftValue(static_cast<T>(metadata.operands[2].imm),
+                         metadata.operands[2].shift.type,
+                         metadata.operands[2].shift.value);
+  if (calcNZCV) return addWithCarry(n, ~m, true);
+  return {(n - m), 0};
+}
+
+/** Helper function for instructions with the format `sub{s} rd, rn, rm{,
+ * shift #amount}`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of [resulting value, nzcv]. */
+template <typename T>
+std::tuple<T, uint8_t> subShift_3ops(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) {
+  const T n = operands[0].get<T>();
+  const T m = shiftValue(operands[1].get<T>(), metadata.operands[2].shift.type,
+                         metadata.operands[2].shift.value);
+  if (calcNZCV) return addWithCarry(n, ~m, true);
+  return {(n - m), 0};
+}
 
-  /** Helper function for instructions with the format `sub{s} rd, rn, rm{,
-   * shift #amount}`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of [resulting value, nzcv]. */
-  template <typename T>
-  static std::tuple<T, uint8_t> subShift_3ops(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      bool calcNZCV) {
-    const T n = operands[0].get<T>();
-    const T m =
-        shiftValue(operands[1].get<T>(), metadata.operands[2].shift.type,
-                   metadata.operands[2].shift.value);
-    if (calcNZCV) return AuxFunc::addWithCarry(n, ~m, true);
-    return {(n - m), 0};
-  }
-};
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/auxiliaryFunctions.hh b/src/include/simeng/arch/aarch64/helpers/auxiliaryFunctions.hh
index 036df3f061..2a612cea8f 100644
--- a/src/include/simeng/arch/aarch64/helpers/auxiliaryFunctions.hh
+++ b/src/include/simeng/arch/aarch64/helpers/auxiliaryFunctions.hh
@@ -11,315 +11,326 @@
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class AuxFunc {
- public:
-  /** Performs a type agnostic add with carry. */
-  template <typename T>
-  static std::tuple<T, uint8_t> addWithCarry(T x, T y, bool carryIn) {
-    T result = x + y + carryIn;
-
-    bool n = (result >> (sizeof(T) * 8 - 1));
-    bool z = (result == 0);
-
-    // Trying to calculate whether `result` overflows (`x + y + carryIn > max`).
-    bool c;
-    if (carryIn && x + 1 == 0) {
-      // Implies `x` is max; with a carry set, it will definitely overflow
-      c = true;
-    } else {
-      // We know x + carryIn <= max, so can safely subtract and compare against
-      // y max > x + y + c == max - x > y + c
-      c = ((std::numeric_limits<T>::max() - x - carryIn) < y);
-    }
-
-    // Calculate whether signed result overflows
-    bool v = false;
-    typedef std::make_signed_t<T> ST;
-    auto sx = static_cast<ST>(x);
-    auto sy = static_cast<ST>(y);
-    if (sx >= 0) {
-      // Check if (x + y + c) > MAX
-      // y > (MAX - x - c)
-      v = sy > (std::numeric_limits<ST>::max() - sx - carryIn);
-    } else {
-      // Check if (x + y + c) < MIN
-      // y < (MIN - x - c)
-      v = sy < (std::numeric_limits<ST>::min() - sx - carryIn);
-    }
 
-    return {result, nzcv(n, z, c, v)};
+/** Returns a correctly formatted nzcv value. */
+inline uint8_t nzcv(bool n, bool z, bool c, bool v) {
+  return (n << 3) | (z << 2) | (c << 1) | v;
+}
+
+/** Performs a type agnostic unsigned add with carry. */
+template <typename T>
+inline std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>,
+                        std::tuple<T, uint8_t>>
+addWithCarry(T x, T y, bool carryIn) {
+  T result = x + y + carryIn;
+
+  bool n = (result >> (sizeof(T) * 8 - 1));
+  bool z = (result == 0);
+
+  // Trying to calculate whether `result` overflows (`x + y + carryIn > max`).
+  bool c;
+  if (carryIn && x + 1 == 0) {
+    // Implies `x` is max; with a carry set, it will definitely overflow
+    c = true;
+  } else {
+    // We know x + carryIn <= max, so can safely subtract and compare against
+    // y max > x + y + c == max - x > y + c
+    c = ((std::numeric_limits<T>::max() - x - carryIn) < y);
   }
 
-  /** Manipulate the bitfield `value` according to the logic of the (U|S)BFM
-   * Armv9.2-a instructions. */
-  template <typename T>
-  static std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>, T>
-  bitfieldManipulate(T value, T dest, uint8_t rotateBy, uint8_t sourceBits,
-                     bool signExtend = false) {
-    size_t bits = sizeof(T) * 8;
-
-    T source;
-    T destMask;
-    uint8_t highestBit = sourceBits;
-    if (sourceBits >= rotateBy) {
-      // Mask of values [rotateBy:source+1]
-      destMask = (static_cast<T>(-1) << (sourceBits - rotateBy + 1));
-      source = value >> rotateBy;
-      highestBit -= rotateBy;
-    } else {
-      T upper = (static_cast<T>(-1) << (bits - rotateBy));
-      T lower = (static_cast<T>(-1) >> (rotateBy - sourceBits - 1));
-      destMask = upper ^ lower;
-      source = value << (bits - rotateBy);
-      highestBit += (bits - rotateBy);
-    }
-
-    T result = (dest & destMask) | (source & ~destMask);
+  // Calculate whether signed result overflows
+  bool v = false;
+  typedef std::make_signed_t<T> ST;
+  auto sx = static_cast<ST>(x);
+  auto sy = static_cast<ST>(y);
+  if (sx >= 0) {
+    // Check if (x + y + c) > MAX
+    // y > (MAX - x - c)
+    v = sy > (std::numeric_limits<ST>::max() - sx - carryIn);
+  } else {
+    // Check if (x + y + c) < MIN
+    // y < (MIN - x - c)
+    v = sy < (std::numeric_limits<ST>::min() - sx - carryIn);
+  }
 
-    if (!signExtend) {
-      return result;
-    }
+  return {result, nzcv(n, z, c, v)};
+}
+
+/** Manipulate the bitfield `value` according to the logic of the (U|S)BFM
+ * Armv9.2-a instructions. */
+template <typename T>
+inline std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>, T>
+bitfieldManipulate(T value, T dest, uint8_t rotateBy, uint8_t sourceBits,
+                   bool signExtend = false) {
+  size_t bits = sizeof(T) * 8;
+
+  T source;
+  T destMask;
+  uint8_t highestBit = sourceBits;
+  if (sourceBits >= rotateBy) {
+    // Mask of values [rotateBy:source+1]
+    destMask = (static_cast<T>(-1) << (sourceBits - rotateBy + 1));
+    source = value >> rotateBy;
+    highestBit -= rotateBy;
+  } else {
+    T upper = (static_cast<T>(-1) << (bits - rotateBy));
+    T lower = (static_cast<T>(-1) >> (rotateBy - sourceBits - 1));
+    destMask = upper ^ lower;
+    source = value << (bits - rotateBy);
+    highestBit += (bits - rotateBy);
+  }
 
-    if (highestBit > bits) {
-      // Nothing to do; implicitly sign-extended
-      return result;
-    }
+  T result = (dest & destMask) | (source & ~destMask);
 
-    // Let the compiler do sign-extension for us.
-    uint8_t shiftAmount = bits - highestBit - 1;
-    // Shift the bitfield up, and cast to a signed type, so the highest bit is
-    // now the sign bit
-    auto shifted = static_cast<std::make_signed_t<T>>(result << shiftAmount);
-    // Shift the bitfield back to where it was; as it's a signed type, the
-    // compiler will sign-extend the highest bit
-    return shifted >> shiftAmount;
+  if (!signExtend) {
+    return result;
   }
 
-  /** Function to check if NZCV conditions hold. */
-  static bool conditionHolds(uint8_t cond, uint8_t nzcv) {
-    if (cond == 0b1111) {
-      return true;
-    }
-
-    bool inverse = cond & 1;
-    uint8_t upper = cond >> 1;
-    bool n = (nzcv >> 3) & 1;
-    bool z = (nzcv >> 2) & 1;
-    bool c = (nzcv >> 1) & 1;
-    bool v = nzcv & 1;
-    bool result;
-    switch (upper) {
-      case 0b000:
-        result = z;
-        break;  // EQ/NE
-      case 0b001:
-        result = c;
-        break;  // CS/CC
-      case 0b010:
-        result = n;
-        break;  // MI/PL
-      case 0b011:
-        result = v;
-        break;  // VS/VC
-      case 0b100:
-        result = (c && !z);
-        break;  // HI/LS
-      case 0b101:
-        result = (n == v);
-        break;  // GE/LT
-      case 0b110:
-        result = (n == v && !z);
-        break;  // GT/LE
-      default:  // 0b111, AL
-        result = true;
-    }
+  if (highestBit > bits) {
+    // Nothing to do; implicitly sign-extended
+    return result;
+  }
 
-    return (inverse ? !result : result);
+  // Let the compiler do sign-extension for us.
+  uint8_t shiftAmount = bits - highestBit - 1;
+  // Shift the bitfield up, and cast to a signed type, so the highest bit is
+  // now the sign bit
+  auto shifted = static_cast<std::make_signed_t<T>>(result << shiftAmount);
+  // Shift the bitfield back to where it was; as it's a signed type, the
+  // compiler will sign-extend the highest bit
+  return shifted >> shiftAmount;
+}
+
+/** Function to check if NZCV conditions hold. */
+inline bool conditionHolds(uint8_t cond, uint8_t nzcv) {
+  bool inverse = cond & 1;
+  uint8_t upper = cond >> 1;
+  bool n = (nzcv >> 3) & 1;
+  bool z = (nzcv >> 2) & 1;
+  bool c = (nzcv >> 1) & 1;
+  bool v = nzcv & 1;
+  bool result;
+  switch (upper) {
+    case 0b000:
+      result = z;
+      break;  // EQ/NE
+    case 0b001:
+      result = c;
+      break;  // CS/CC
+    case 0b010:
+      result = n;
+      break;  // MI/PL
+    case 0b011:
+      result = v;
+      break;  // VS/VC
+    case 0b100:
+      result = (c && !z);
+      break;  // HI/LS
+    case 0b101:
+      result = (n == v);
+      break;  // GE/LT
+    case 0b110:
+      result = (n == v && !z);
+      break;  // GT/LE
+    default:  // 0b111, AL
+      // AL returns true regardless of inverse value
+      result = (true ^ inverse);
+  }
+  return (result ^ inverse);
+}
+
+/** Extend `value` according to `extendType`, and left-shift the result by
+ * `shift`. Replicated from Instruction.cc */
+inline uint64_t extendValue(uint64_t value, uint8_t extendType, uint8_t shift) {
+  if (extendType == ARM64_EXT_INVALID && shift == 0) {
+    // Special case: an invalid shift type with a shift amount of 0 implies an
+    // identity operation
+    return value;
   }
 
-  // Rounding function that rounds a double to nearest integer (64-bit). In
-  // event of a tie (i.e. 7.5) it will be rounded to the nearest even number.
-  template <typename IN, typename OUT>
-  static OUT roundToNearestTiesToEven(IN input) {
-    IN half = static_cast<IN>(0.5);
-    if (std::fabs(input - std::trunc(input)) == half) {
-      OUT truncd = static_cast<OUT>(std::trunc(input));
-      // if value is negative, then may need to -1 from truncd, else may need to
-      // +1.
-      OUT addand = (truncd > 0) ? 1 : -1;
-      return ((truncd % 2 == 0) ? truncd : (truncd + addand));
-    }
-    // Otherwise round to nearest
-    return static_cast<OUT>(std::round(input));
+  uint64_t extended;
+  switch (extendType) {
+    case ARM64_EXT_UXTB:
+      extended = static_cast<uint8_t>(value);
+      break;
+    case ARM64_EXT_UXTH:
+      extended = static_cast<uint16_t>(value);
+      break;
+    case ARM64_EXT_UXTW:
+      extended = static_cast<uint32_t>(value);
+      break;
+    case ARM64_EXT_UXTX:
+      extended = value;
+      break;
+    case ARM64_EXT_SXTB:
+      extended = static_cast<int8_t>(value);
+      break;
+    case ARM64_EXT_SXTH:
+      extended = static_cast<int16_t>(value);
+      break;
+    case ARM64_EXT_SXTW:
+      extended = static_cast<int32_t>(value);
+      break;
+    case ARM64_EXT_SXTX:
+      extended = value;
+      break;
+    default:
+      assert(false && "Invalid extension type");
+      return 0;
   }
 
-  /** Extend `value` according to `extendType`, and left-shift the result by
-   * `shift`. Replicated from Instruction.cc */
-  static uint64_t extendValue(uint64_t value, uint8_t extendType,
-                              uint8_t shift) {
-    if (extendType == ARM64_EXT_INVALID && shift == 0) {
-      // Special case: an invalid shift type with a shift amount of 0 implies an
-      // identity operation
+  return extended << shift;
+}
+
+/** Extend `value` using extension/shifting rules defined in `op`. */
+inline uint64_t extendOffset(uint64_t value, const cs_arm64_op& op) {
+  if (op.ext == 0) {
+    if (op.shift.value == 0) {
       return value;
     }
-
-    uint64_t extended;
-    switch (extendType) {
-      case ARM64_EXT_UXTB:
-        extended = static_cast<uint8_t>(value);
-        break;
-      case ARM64_EXT_UXTH:
-        extended = static_cast<uint16_t>(value);
-        break;
-      case ARM64_EXT_UXTW:
-        extended = static_cast<uint32_t>(value);
-        break;
-      case ARM64_EXT_UXTX:
-        extended = value;
-        break;
-      case ARM64_EXT_SXTB:
-        extended = static_cast<int8_t>(value);
-        break;
-      case ARM64_EXT_SXTH:
-        extended = static_cast<int16_t>(value);
-        break;
-      case ARM64_EXT_SXTW:
-        extended = static_cast<int32_t>(value);
-        break;
-      case ARM64_EXT_SXTX:
-        extended = value;
-        break;
-      default:
-        assert(false && "Invalid extension type");
-        return 0;
+    if (op.shift.type == 1) {
+      return extendValue(value, ARM64_EXT_UXTX, op.shift.value);
     }
-
-    return extended << shift;
   }
-
-  // Rounding function that rounds a float to nearest integer (32-bit). In event
-  // of a tie (i.e. 7.5) it will be rounded to the nearest even number.
-  static int32_t floatRoundToNearestTiesToEven(float input) {
-    if (std::fabs(input - std::trunc(input)) == 0.5f) {
-      if (static_cast<int32_t>(input - 0.5f) % 2 == 0) {
-        return static_cast<int32_t>(input - 0.5f);
-      } else {
-        return static_cast<int32_t>(input + 0.5f);
-      }
+  return extendValue(value, op.ext, op.shift.value);
+}
+
+/** Calculate the corresponding NZCV values from select SVE instructions that
+ * set the First(N), None(Z), !Last(C) condition flags based on the predicate
+ * result, and the V flag to 0. */
+inline uint8_t getNZCVfromPred(std::array<uint64_t, 4> predResult,
+                               uint64_t VL_bits, int byteCount) {
+  uint8_t N = (predResult[0] & 1);
+  uint8_t Z = 1;
+  // (int)(VL_bits - 1)/512 derives which block of 64-bits within the
+  // predicate register we're working in. 1ull << (VL_bits / 8) - byteCount)
+  // derives a 1 in the last position of the current predicate. Both
+  // dictated by vector length.
+  uint8_t C = !(predResult[(int)((VL_bits - 1) / 512)] &
+                1ull << (((VL_bits / 8) - byteCount) % 64));
+  for (int i = 0; i < (int)((VL_bits - 1) / 512) + 1; i++) {
+    if (predResult[i]) {
+      Z = 0;
+      break;
     }
-    // Otherwise round to nearest
-    return static_cast<int32_t>(std::round(input));
   }
-
-  /** Calculate the corresponding NZCV values from select SVE instructions that
-   * set the First(N), None(Z), !Last(C) condition flags based on the predicate
-   * result, and the V flag to 0. */
-  static uint8_t getNZCVfromPred(std::array<uint64_t, 4> predResult,
-                                 uint64_t VL_bits, int byteCount) {
-    uint8_t N = (predResult[0] & 1);
-    uint8_t Z = 1;
-    // (int)(VL_bits - 1)/512 derives which block of 64-bits within the
-    // predicate register we're working in. 1ull << (VL_bits / 8) - byteCount)
-    // derives a 1 in the last position of the current predicate. Both
-    // dictated by vector length.
-    uint8_t C = !(predResult[(int)((VL_bits - 1) / 512)] &
-                  1ull << (((VL_bits / 8) - byteCount) % 64));
-    for (int i = 0; i < (int)((VL_bits - 1) / 512) + 1; i++) {
-      if (predResult[i]) {
-        Z = 0;
-        break;
-      }
+  return nzcv(N, Z, C, 0);
+}
+
+/** Multiply `a` and `b`, and return the high 64 bits of the result.
+ * https://stackoverflow.com/a/28904636 */
+inline uint64_t mulhi(uint64_t a, uint64_t b) {
+  uint64_t a_lo = (uint32_t)a;
+  uint64_t a_hi = a >> 32;
+  uint64_t b_lo = (uint32_t)b;
+  uint64_t b_hi = b >> 32;
+
+  uint64_t a_x_b_hi = a_hi * b_hi;
+  uint64_t a_x_b_mid = a_hi * b_lo;
+  uint64_t b_x_a_mid = b_hi * a_lo;
+  uint64_t a_x_b_lo = a_lo * b_lo;
+
+  uint64_t carry_bit = ((uint64_t)(uint32_t)a_x_b_mid +
+                        (uint64_t)(uint32_t)b_x_a_mid + (a_x_b_lo >> 32)) >>
+                       32;
+
+  uint64_t multhi =
+      a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
+
+  return multhi;
+}
+
+/** Decode the instruction pattern from OperandStr. */
+inline uint16_t sveGetPattern(const std::string operandStr, const uint8_t esize,
+                              const uint16_t VL_) {
+  const uint16_t elements = VL_ / esize;
+  const std::vector<std::string> patterns = {
+      "pow2", "vl1",  "vl2",  "vl3",   "vl4",   "vl5",  "vl6",  "vl7", "vl8",
+      "vl16", "vl32", "vl64", "vl128", "vl256", "mul3", "mul4", "all"};
+
+  // If no pattern present in operandStr then same behaviour as ALL
+  std::string pattern = "all";
+  for (uint8_t i = 0; i < patterns.size(); i++) {
+    if (operandStr.find(patterns[i]) != std::string::npos) {
+      pattern = patterns[i];
+      // Don't break when pattern found as vl1 will be found in vl128 etc
     }
-    return nzcv(N, Z, C, 0);
-  }
-
-  /** Multiply `a` and `b`, and return the high 64 bits of the result.
-   * https://stackoverflow.com/a/28904636 */
-  static uint64_t mulhi(uint64_t a, uint64_t b) {
-    uint64_t a_lo = (uint32_t)a;
-    uint64_t a_hi = a >> 32;
-    uint64_t b_lo = (uint32_t)b;
-    uint64_t b_hi = b >> 32;
-
-    uint64_t a_x_b_hi = a_hi * b_hi;
-    uint64_t a_x_b_mid = a_hi * b_lo;
-    uint64_t b_x_a_mid = b_hi * a_lo;
-    uint64_t a_x_b_lo = a_lo * b_lo;
-
-    uint64_t carry_bit = ((uint64_t)(uint32_t)a_x_b_mid +
-                          (uint64_t)(uint32_t)b_x_a_mid + (a_x_b_lo >> 32)) >>
-                         32;
-
-    uint64_t multhi =
-        a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
-
-    return multhi;
-  }
-
-  /** Returns a correctly formatted nzcv value. */
-  static uint8_t nzcv(bool n, bool z, bool c, bool v) {
-    return (n << 3) | (z << 2) | (c << 1) | v;
   }
 
-  /** Decode the instruction pattern from OperandStr. */
-  static uint16_t sveGetPattern(const std::string operandStr,
-                                const uint8_t esize, const uint16_t VL_) {
-    const uint16_t elements = VL_ / esize;
-    const std::vector<std::string> patterns = {
-        "pow2", "vl1",  "vl2",  "vl3",   "vl4",   "vl5",  "vl6",  "vl7", "vl8",
-        "vl16", "vl32", "vl64", "vl128", "vl256", "mul3", "mul4", "all"};
-
-    // If no pattern present in operandStr then same behaviour as ALL
-    std::string pattern = "all";
-    for (uint8_t i = 0; i < patterns.size(); i++) {
-      if (operandStr.find(patterns[i]) != std::string::npos) {
-        pattern = patterns[i];
-        // Don't break when pattern found as vl1 will be found in vl128 etc
-      }
+  if (pattern == "all")
+    return elements;
+  else if (pattern == "pow2") {
+    int n = 1;
+    while (elements >= std::pow(2, n)) {
+      n = n + 1;
     }
-
-    if (pattern == "all")
-      return elements;
-    else if (pattern == "pow2") {
-      int n = 1;
-      while (elements >= std::pow(2, n)) {
-        n = n + 1;
-      }
-      return std::pow(2, n - 1);
-    } else if (pattern == "vl1")
-      return (elements >= 1) ? 1 : 0;
-    else if (pattern == "vl2")
-      return (elements >= 2) ? 2 : 0;
-    else if (pattern == "vl3")
-      return (elements >= 3) ? 3 : 0;
-    else if (pattern == "vl4")
-      return (elements >= 4) ? 4 : 0;
-    else if (pattern == "vl5")
-      return (elements >= 5) ? 5 : 0;
-    else if (pattern == "vl6")
-      return (elements >= 6) ? 6 : 0;
-    else if (pattern == "vl7")
-      return (elements >= 7) ? 7 : 0;
-    else if (pattern == "vl8")
-      return (elements >= 8) ? 8 : 0;
-    else if (pattern == "vl16")
-      return (elements >= 16) ? 16 : 0;
-    else if (pattern == "vl32")
-      return (elements >= 32) ? 32 : 0;
-    else if (pattern == "vl64")
-      return (elements >= 64) ? 64 : 0;
-    else if (pattern == "vl128")
-      return (elements >= 128) ? 128 : 0;
-    else if (pattern == "vl256")
-      return (elements >= 256) ? 256 : 0;
-    else if (pattern == "mul4")
-      return elements - (elements % 4);
-    else if (pattern == "mul3")
-      return elements - (elements % 3);
-
-    return 0;
+    return std::pow(2, n - 1);
+  } else if (pattern == "vl1")
+    return (elements >= 1) ? 1 : 0;
+  else if (pattern == "vl2")
+    return (elements >= 2) ? 2 : 0;
+  else if (pattern == "vl3")
+    return (elements >= 3) ? 3 : 0;
+  else if (pattern == "vl4")
+    return (elements >= 4) ? 4 : 0;
+  else if (pattern == "vl5")
+    return (elements >= 5) ? 5 : 0;
+  else if (pattern == "vl6")
+    return (elements >= 6) ? 6 : 0;
+  else if (pattern == "vl7")
+    return (elements >= 7) ? 7 : 0;
+  else if (pattern == "vl8")
+    return (elements >= 8) ? 8 : 0;
+  else if (pattern == "vl16")
+    return (elements >= 16) ? 16 : 0;
+  else if (pattern == "vl32")
+    return (elements >= 32) ? 32 : 0;
+  else if (pattern == "vl64")
+    return (elements >= 64) ? 64 : 0;
+  else if (pattern == "vl128")
+    return (elements >= 128) ? 128 : 0;
+  else if (pattern == "vl256")
+    return (elements >= 256) ? 256 : 0;
+  else if (pattern == "mul4")
+    return elements - (elements % 4);
+  else if (pattern == "mul3")
+    return elements - (elements % 3);
+
+  return 0;
+}
+
+/** Apply the shift specified by `shiftType` to the unsigned integer `value`,
+ * shifting by `amount`. */
+template <typename T>
+inline std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>, T>
+shiftValue(T value, uint8_t shiftType, uint8_t amount) {
+  switch (shiftType) {
+    case ARM64_SFT_LSL:
+      return value << amount;
+    case ARM64_SFT_LSR:
+      return value >> amount;
+    case ARM64_SFT_ASR:
+      return static_cast<std::make_signed_t<T>>(value) >> amount;
+    case ARM64_SFT_ROR: {
+      // Assuming sizeof(T) is a power of 2.
+      const T mask = sizeof(T) * 8 - 1;
+      assert((amount <= mask) && "Rotate amount exceeds type width");
+      amount &= mask;
+      return (value >> amount) | (value << ((-amount) & mask));
+    }
+    case ARM64_SFT_MSL: {
+      // pad in with ones instead of zeros
+      const T mask = (static_cast<T>(1) << static_cast<T>(amount)) - 1;
+      return (value << amount) | mask;
+    }
+    case ARM64_SFT_INVALID:
+      return value;
+    default:
+      assert(false && "Unknown shift type");
+      return 0;
   }
-};
+}
+
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/bitmanip.hh b/src/include/simeng/arch/aarch64/helpers/bitmanip.hh
index 1316e5ab1f..07adc9c73c 100644
--- a/src/include/simeng/arch/aarch64/helpers/bitmanip.hh
+++ b/src/include/simeng/arch/aarch64/helpers/bitmanip.hh
@@ -5,79 +5,76 @@
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class bitmanipHelp {
- public:
-  /** Helper function for instructions with the format `bfm rd, rn, #immr,
-   * #imms`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T bfm_2imms(std::vector<RegisterValue>& operands,
-                     const simeng::arch::aarch64::InstructionMetadata& metadata,
-                     bool signExtend, bool zeroDestReg) {
-    uint8_t r = metadata.operands[2].imm;
-    uint8_t s = metadata.operands[3].imm;
-    T dest, source;
-    if (!zeroDestReg) {
-      dest = operands[0].get<T>();
-      source = operands[1].get<T>();
-    } else {
-      dest = 0;
-      source = operands[0].get<T>();
-    }
-    return AuxFunc::bitfieldManipulate(source, dest, r, s, signExtend);
-  }
 
-  /** Helper function for instructions with the format `extr rd, rn, rm, #lsb`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T extrLSB_registers(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    T n = operands[0].get<T>();
-    T m = operands[1].get<T>();
-    int64_t lsb = metadata.operands[3].imm;
-    if (lsb == 0) return m;
-    return (m >> lsb) | (n << ((sizeof(T) * 8) - lsb));
+/** Helper function for instructions with the format `bfm rd, rn, #immr,
+ * #imms`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T bfm_2imms(std::vector<RegisterValue>& operands,
+            const simeng::arch::aarch64::InstructionMetadata& metadata,
+            bool signExtend, bool zeroDestReg) {
+  uint8_t r = metadata.operands[2].imm;
+  uint8_t s = metadata.operands[3].imm;
+  T dest, source;
+  if (!zeroDestReg) {
+    dest = operands[0].get<T>();
+    source = operands[1].get<T>();
+  } else {
+    dest = 0;
+    source = operands[0].get<T>();
   }
+  return bitfieldManipulate(source, dest, r, s, signExtend);
+}
 
-  /** Helper function for instructions with the format `rbit rd, rn`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type uint64_t. */
-  template <typename T>
-  static uint64_t rbit(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    int width = sizeof(T) * 8;
+/** Helper function for instructions with the format `extr rd, rn, rm, #lsb`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T extrLSB_registers(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  T n = operands[0].get<T>();
+  T m = operands[1].get<T>();
+  int64_t lsb = metadata.operands[3].imm;
+  if (lsb == 0) return m;
+  return (m >> lsb) | (n << ((sizeof(T) * 8) - lsb));
+}
 
-    static uint8_t reversedNibble[16] = {
-        0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010, 0b0110, 0b1110,
-        0b0001, 0b1001, 0b0101, 0b1101, 0b0011, 0b1011, 0b0111, 0b1111};
+/** Helper function for instructions with the format `rbit rd, rn`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type uint64_t. */
+template <typename T>
+uint64_t rbit(std::vector<RegisterValue>& operands,
+              const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  int width = sizeof(T) * 8;
 
-    uint64_t n = operands[0].get<uint64_t>();
-    uint64_t result = 0;
-    for (int i = 0; i < width; i += 4) {
-      result <<= 4;
-      result |= reversedNibble[n & 0b1111];
-      n >>= 4;
-    }
-    return result;
-  }
+  uint8_t reversedNibble[16] = {0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010,
+                                0b0110, 0b1110, 0b0001, 0b1001, 0b0101, 0b1101,
+                                0b0011, 0b1011, 0b0111, 0b1111};
 
-  /** Helper function for instructions with the format `rev rd, rn`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns array of uint8_t with number of elements = bytes in T. */
-  template <typename T>
-  static std::array<uint8_t, sizeof(T)> rev(
-      std::vector<RegisterValue>& operands) {
-    auto bytes = operands[0].getAsVector<uint8_t>();
-    std::array<uint8_t, sizeof(T)> reversed;
-    // Copy `bytes` backwards onto `reversed`
-    std::copy(bytes, bytes + sizeof(T), std::rbegin(reversed));
-    return reversed;
+  uint64_t n = operands[0].get<uint64_t>();
+  uint64_t result = 0;
+  for (int i = 0; i < width; i += 4) {
+    result <<= 4;
+    result |= reversedNibble[n & 0b1111];
+    n >>= 4;
   }
-};
+  return result;
+}
+
+/** Helper function for instructions with the format `rev rd, rn`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns array of uint8_t with number of elements = bytes in T. */
+template <typename T>
+std::array<uint8_t, sizeof(T)> rev(std::vector<RegisterValue>& operands) {
+  auto bytes = operands[0].getAsVector<uint8_t>();
+  std::array<uint8_t, sizeof(T)> reversed;
+  // Copy `bytes` backwards onto `reversed`
+  std::copy(bytes, bytes + sizeof(T), std::rbegin(reversed));
+  return reversed;
+}
+
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/comparison.hh b/src/include/simeng/arch/aarch64/helpers/comparison.hh
index 04ea68ee1a..d2965ab027 100644
--- a/src/include/simeng/arch/aarch64/helpers/comparison.hh
+++ b/src/include/simeng/arch/aarch64/helpers/comparison.hh
@@ -5,23 +5,20 @@
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class comparisonHelp {
- public:
-  /** Helper function for instructions with the format `orr rd, rn, rm {shift
-   * #amount}`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T orrShift_3ops(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    const T n = operands[0].get<T>();
-    const T m =
-        shiftValue(operands[1].get<T>(), metadata.operands[2].shift.type,
-                   metadata.operands[2].shift.value);
-    return (n | m);
-  }
-};
+
+/** Helper function for instructions with the format `orr rd, rn, rm {shift
+ * #amount}`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T orrShift_3ops(std::vector<RegisterValue>& operands,
+                const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  const T n = operands[0].get<T>();
+  const T m = shiftValue(operands[1].get<T>(), metadata.operands[2].shift.type,
+                         metadata.operands[2].shift.value);
+  return (n | m);
+}
+
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/conditional.hh b/src/include/simeng/arch/aarch64/helpers/conditional.hh
index 4cdfce4061..e45a12f025 100644
--- a/src/include/simeng/arch/aarch64/helpers/conditional.hh
+++ b/src/include/simeng/arch/aarch64/helpers/conditional.hh
@@ -5,114 +5,109 @@
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class conditionalHelp {
- public:
-  /** Helper function for instructions with the format `ccmn rn, #imm #nzcv,
-   * cc`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type uint8_t. */
-  template <typename T>
-  static uint8_t ccmn_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    if (AuxFunc::conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
-      uint8_t nzcv;
-      std::tie(std::ignore, nzcv) = AuxFunc::addWithCarry(
-          operands[1].get<T>(), static_cast<T>(metadata.operands[1].imm), 0);
-      return nzcv;
-    }
-    return static_cast<uint8_t>(metadata.operands[2].imm);
+
+/** Helper function for instructions with the format `ccmn rn, #imm #nzcv,
+ * cc`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type uint8_t. */
+template <typename T>
+uint8_t ccmn_imm(std::vector<RegisterValue>& operands,
+                 const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  if (conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
+    uint8_t nzcv;
+    std::tie(std::ignore, nzcv) = addWithCarry(
+        operands[1].get<T>(), static_cast<T>(metadata.operands[1].imm), 0);
+    return nzcv;
   }
+  return static_cast<uint8_t>(metadata.operands[2].imm);
+}
 
-  /** Helper function for instructions with the format `ccmp rn, #imm #nzcv,
-   * cc`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type uint8_t. */
-  template <typename T>
-  static uint8_t ccmp_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    if (AuxFunc::conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
-      uint8_t nzcv;
-      std::tie(std::ignore, nzcv) = AuxFunc::addWithCarry(
-          operands[1].get<T>(), ~static_cast<T>(metadata.operands[1].imm), 1);
-      return nzcv;
-    }
-    return static_cast<uint8_t>(metadata.operands[2].imm);
+/** Helper function for instructions with the format `ccmp rn, #imm #nzcv,
+ * cc`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type uint8_t. */
+template <typename T>
+uint8_t ccmp_imm(std::vector<RegisterValue>& operands,
+                 const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  if (conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
+    uint8_t nzcv;
+    std::tie(std::ignore, nzcv) = addWithCarry(
+        operands[1].get<T>(), ~static_cast<T>(metadata.operands[1].imm), 1);
+    return nzcv;
   }
+  return static_cast<uint8_t>(metadata.operands[2].imm);
+}
 
-  /** Helper function for instructions with the format `ccmp rn, rm, #nzcv,
-   * cc`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type uint8_t. */
-  template <typename T>
-  static uint8_t ccmp_reg(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    if (AuxFunc::conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
-      uint8_t nzcv;
-      std::tie(std::ignore, nzcv) =
-          AuxFunc::addWithCarry(operands[1].get<T>(), ~operands[2].get<T>(), 1);
-      return nzcv;
-    }
-    return static_cast<uint8_t>(metadata.operands[2].imm);
+/** Helper function for instructions with the format `ccmp rn, rm, #nzcv,
+ * cc`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type uint8_t. */
+template <typename T>
+uint8_t ccmp_reg(std::vector<RegisterValue>& operands,
+                 const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  if (conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
+    uint8_t nzcv;
+    std::tie(std::ignore, nzcv) =
+        addWithCarry(operands[1].get<T>(), ~operands[2].get<T>(), 1);
+    return nzcv;
   }
+  return static_cast<uint8_t>(metadata.operands[2].imm);
+}
 
-  /** Helper function for instructions with the format `cb<z,nz> rn, #imm`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of type [bool branch taken, uint64_t address]. */
-  template <typename T>
-  static std::tuple<bool, uint64_t> condBranch_cmpToZero(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      uint64_t instructionAddress, std::function<bool(T)> func) {
-    bool branchTaken;
-    uint64_t branchAddress;
-    if (func(operands[0].get<T>())) {
-      branchTaken = true;
-      branchAddress = instructionAddress + metadata.operands[1].imm;
-    } else {
-      branchTaken = false;
-      branchAddress = instructionAddress + 4;
-    }
-    return {branchTaken, branchAddress};
+/** Helper function for instructions with the format `cb<z,nz> rn, #imm`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of type [bool branch taken, uint64_t address]. */
+template <typename T>
+std::tuple<bool, uint64_t> condBranch_cmpToZero(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    uint64_t instructionAddress, std::function<bool(T)> func) {
+  bool branchTaken;
+  uint64_t branchAddress;
+  if (func(operands[0].get<T>())) {
+    branchTaken = true;
+    branchAddress = instructionAddress + metadata.operands[1].imm;
+  } else {
+    branchTaken = false;
+    branchAddress = instructionAddress + 4;
   }
+  return {branchTaken, branchAddress};
+}
 
-  /** Helper function for instructions with the format `cs<el, neg, inc, inv>
-   * rd, rn, rm, cc`.
-   * T represents the type of operands (e.g. for xd, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T cs_4ops(std::vector<RegisterValue>& operands,
-                   const simeng::arch::aarch64::InstructionMetadata& metadata,
-                   std::function<T(T)> func) {
-    if (AuxFunc::conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
-      return operands[1].get<T>();
-    }
-    return func(operands[2].get<T>());
+/** Helper function for instructions with the format `cs<el, neg, inc, inv>
+ * rd, rn, rm, cc`.
+ * T represents the type of operands (e.g. for xd, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T cs_4ops(std::vector<RegisterValue>& operands,
+          const simeng::arch::aarch64::InstructionMetadata& metadata,
+          std::function<T(T)> func) {
+  if (conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
+    return operands[1].get<T>();
   }
+  return func(operands[2].get<T>());
+}
 
-  /** Helper function for instructions with the format `tb<z,nz> rn, #imm,
-   * label`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of type [bool branch taken, uint64_t address]. */
-  template <typename T>
-  static std::tuple<bool, uint64_t> tbnz_tbz(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      uint64_t instructionAddress, bool isNZ) {
-    bool branchTaken;
-    uint64_t branchAddress = instructionAddress;
-    if (operands[0].get<T>() &
-        (static_cast<T>(1) << metadata.operands[1].imm)) {
-      branchTaken = isNZ;
-    } else {
-      branchTaken = !isNZ;
-    }
-    branchAddress += branchTaken ? metadata.operands[2].imm : 4;
-    return {branchTaken, branchAddress};
+/** Helper function for instructions with the format `tb<z,nz> rn, #imm,
+ * label`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of type [bool branch taken, uint64_t address]. */
+template <typename T>
+std::tuple<bool, uint64_t> tbnz_tbz(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    uint64_t instructionAddress, bool isNZ) {
+  bool branchTaken;
+  uint64_t branchAddress = instructionAddress;
+  if (operands[0].get<T>() & (static_cast<T>(1) << metadata.operands[1].imm)) {
+    branchTaken = isNZ;
+  } else {
+    branchTaken = !isNZ;
   }
-};
+  branchAddress += branchTaken ? metadata.operands[2].imm : 4;
+  return {branchTaken, branchAddress};
+}
+
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/divide.hh b/src/include/simeng/arch/aarch64/helpers/divide.hh
index f4e226a0b6..b67d08de6f 100644
--- a/src/include/simeng/arch/aarch64/helpers/divide.hh
+++ b/src/include/simeng/arch/aarch64/helpers/divide.hh
@@ -5,19 +5,18 @@
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class divideHelp {
- public:
-  /** Helper function for instructions with the format `div rd, rn, rm`.
-   * T represents the type of operands (e.g. for xd, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T div_3ops(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>();
-    if (m == 0) return 0;
-    return (n / m);
-  }
-};
+
+/** Helper function for instructions with the format `div rd, rn, rm`.
+ * T represents the type of operands (e.g. for xd, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T div_3ops(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>();
+  if (m == 0) return 0;
+  return (n / m);
+}
+
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/float.hh b/src/include/simeng/arch/aarch64/helpers/float.hh
index 169f4d56a7..59c0912c6a 100644
--- a/src/include/simeng/arch/aarch64/helpers/float.hh
+++ b/src/include/simeng/arch/aarch64/helpers/float.hh
@@ -1,156 +1,182 @@
 #pragma once
 
+#include <limits>
+
 #include "auxiliaryFunctions.hh"
 
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class floatHelp {
- public:
-  /** Helper function for instructions with the format `fabd rd, rn, rm`.
-   * T represents the type of operands (e.g. for sd T = float).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue fabd_3ops(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>();
-    return {std::fabs(n - m), 256};
-  }
 
-  /** Helper function for instructions with the format `fabs rd, rn`.
-   * T represents the type of operands (e.g. for sd T = float).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue fabs_2ops(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    return {std::fabs(n), 256};
-  }
+/** Helper function for instructions with the format `fabd rd, rn, rm`.
+ * T represents the type of operands (e.g. for sd T = float).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue fabd_3ops(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>();
+  return {std::fabs(n - m), 256};
+}
 
-  /** Helper function for instructions with the format `fccmp rn, rm, #nzcv,
-   * cc`.
-   * T represents the type of operands (e.g. for sn T = float).
-   * Returns single value of type uint8_t. */
-  template <typename T>
-  static uint8_t fccmp(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    if (AuxFunc::conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
-      T a = operands[1].get<T>();
-      T b = operands[2].get<T>();
-      if (std::isnan(a) || std::isnan(b)) {
-        // TODO: Raise exception if NaNs are signalling or fcmpe
-        return AuxFunc::nzcv(false, false, true, true);
-      } else if (a == b) {
-        return AuxFunc::nzcv(false, true, true, false);
-      } else if (a < b) {
-        return AuxFunc::nzcv(true, false, false, false);
-      } else {
-        return AuxFunc::nzcv(false, false, true, false);
-      }
-    }
-    return static_cast<uint8_t>(metadata.operands[2].imm);
-  }
+/** Helper function for instructions with the format `fabs rd, rn`.
+ * T represents the type of operands (e.g. for sd T = float).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue fabs_2ops(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  return {std::fabs(n), 256};
+}
 
-  /** Helper function for instructions with the format `fcmp rn, <rm, #imm>`.
-   * T represents the type of operands (e.g. for sn T = float).
-   * Returns single value of type uint8_t. */
-  template <typename T>
-  static uint8_t fcmp(std::vector<RegisterValue>& operands, bool useImm) {
-    T a = operands[0].get<T>();
-    // Dont need to fetch imm as will always be 0.0
-    T b = useImm ? 0 : operands[1].get<T>();
+/** Helper function for instructions with the format `fccmp rn, rm, #nzcv,
+ * cc`.
+ * T represents the type of operands (e.g. for sn T = float).
+ * Returns single value of type uint8_t. */
+template <typename T>
+uint8_t fccmp(std::vector<RegisterValue>& operands,
+              const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  if (conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
+    T a = operands[1].get<T>();
+    T b = operands[2].get<T>();
     if (std::isnan(a) || std::isnan(b)) {
       // TODO: Raise exception if NaNs are signalling or fcmpe
-      return AuxFunc::nzcv(false, false, true, true);
+      return nzcv(false, false, true, true);
     } else if (a == b) {
-      return AuxFunc::nzcv(false, true, true, false);
+      return nzcv(false, true, true, false);
     } else if (a < b) {
-      return AuxFunc::nzcv(true, false, false, false);
+      return nzcv(true, false, false, false);
+    } else {
+      return nzcv(false, false, true, false);
     }
-    return AuxFunc::nzcv(false, false, true, false);
   }
+  return static_cast<uint8_t>(metadata.operands[2].imm);
+}
 
-  /** Helper function for instructions with the format `fmaxnm rd, rn, rm`.
-   * T represents the type of operands (e.g. for sd T = float).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue fmaxnm_3ops(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>();
-    return {std::fmax(n, m), 256};
+/** Helper function for instructions with the format `fcmp rn, <rm, #imm>`.
+ * T represents the type of operands (e.g. for sn T = float).
+ * Returns single value of type uint8_t. */
+template <typename T>
+uint8_t fcmp(std::vector<RegisterValue>& operands, bool useImm) {
+  T a = operands[0].get<T>();
+  // Dont need to fetch imm as will always be 0.0
+  T b = useImm ? 0 : operands[1].get<T>();
+  if (std::isnan(a) || std::isnan(b)) {
+    // TODO: Raise exception if NaNs are signalling or fcmpe
+    return nzcv(false, false, true, true);
+  } else if (a == b) {
+    return nzcv(false, true, true, false);
+  } else if (a < b) {
+    return nzcv(true, false, false, false);
   }
+  return nzcv(false, false, true, false);
+}
 
-  /** Helper function for instructions with the format `fmaxnm rd, rn, rm`.
-   * T represents the type of operands (e.g. for sd T = float).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue fminnm_3ops(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>();
-    return {std::fmin(n, m), 256};
-  }
+/** Helper function for instructions with the format `fmaxnm rd, rn, rm`.
+ * T represents the type of operands (e.g. for sd T = float).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue fmaxnm_3ops(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>();
+  return {std::fmax(n, m), 256};
+}
 
-  /** Helper function for NEON instructions with the format `fnmsub rd, rn, rm,
-   * ra`.
-   * T represents the type of operands (e.g. for sd T = float).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue fnmsub_4ops(std::vector<RegisterValue>& operands) {
-    T n = operands[0].get<T>();
-    T m = operands[1].get<T>();
-    T a = operands[2].get<T>();
-    return {std::fma(n, m, -a), 256};
-  }
+/** Helper function for instructions with the format `fmaxnm rd, rn, rm`.
+ * T represents the type of operands (e.g. for sd T = float).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue fminnm_3ops(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>();
+  return {std::fmin(n, m), 256};
+}
 
-  /** Helper function for NEON instructions with the format `fnmadd rd, rn, rm,
-   * ra`.
-   * T represents the type of operands (e.g. for sd T = float).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue fnmadd_4ops(std::vector<RegisterValue>& operands) {
-    T n = operands[0].get<T>();
-    T m = operands[1].get<T>();
-    T a = operands[2].get<T>();
-    return {std::fma(-n, m, -a), 256};
-  }
+/** Helper function for NEON instructions with the format `fnmsub rd, rn, rm,
+ * ra`.
+ * T represents the type of operands (e.g. for sd T = float).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue fnmsub_4ops(std::vector<RegisterValue>& operands) {
+  T n = operands[0].get<T>();
+  T m = operands[1].get<T>();
+  T a = operands[2].get<T>();
+  return {std::fma(n, m, -a), 256};
+}
 
-  /** Helper function for NEON instructions with the format `frintp rd, rn`.
-   * T represents the type of operands (e.g. for dd T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue frintpScalar_2ops(std::vector<RegisterValue>& operands) {
-    T n = operands[0].get<T>();
-
-    // Merge always = false due to assumption that FPCR.nep bit = 0
-    // (In SimEng the value of this register is not manually set)
-    T out = 0;
-    // Input of Infinity or 0 gives output of the same sign
-    if (n == 0.0 || n == -0.0 || n == INFINITY || n == -INFINITY)
-      out = n;
-    else
-      out = std::ceil(n);
-
-    return {out, 256};
-  }
+/** Helper function for NEON instructions with the format `fnmadd rd, rn, rm,
+ * ra`.
+ * T represents the type of operands (e.g. for sd T = float).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue fnmadd_4ops(std::vector<RegisterValue>& operands) {
+  T n = operands[0].get<T>();
+  T m = operands[1].get<T>();
+  T a = operands[2].get<T>();
+  return {std::fma(-n, m, -a), 256};
+}
+
+/** Helper function for NEON instructions with the format `frintp rd, rn`.
+ * T represents the type of operands (e.g. for dd T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue frintpScalar_2ops(std::vector<RegisterValue>& operands) {
+  T n = operands[0].get<T>();
 
-  /** Helper function for NEON instructions with the format `scvtf rd,
-   *  <w,x>n`, #fbits.
-   * D represents the destination vector register type (e.g. for dd, D =
-   * double).
-   * N represents the source vector register type (e.g. for wn, N = int32_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N>
-  static RegisterValue scvtf_FixedPoint(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    N n = operands[0].get<N>();
-    const uint8_t fbits = metadata.operands[2].imm;
-
-    D out = static_cast<D>(n) / std::pow(2, fbits);
-
-    return {out, 256};
+  // Merge always = false due to assumption that FPCR.nep bit = 0
+  // (In SimEng the value of this register is not manually set)
+  T out = 0;
+  // Input of Infinity or 0 gives output of the same sign
+  if (n == 0.0 || n == -0.0 || n == INFINITY || n == -INFINITY)
+    out = n;
+  else
+    out = std::ceil(n);
+
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `scvtf rd,
+ *  <w,x>n`, #fbits.
+ * D represents the destination vector register type (e.g. for dd, D =
+ * double).
+ * N represents the source vector register type (e.g. for wn, N = int32_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename D, typename N>
+RegisterValue scvtf_FixedPoint(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  N n = operands[0].get<N>();
+  const uint8_t fbits = metadata.operands[2].imm;
+
+  D out = static_cast<D>(n) / std::pow(2, fbits);
+
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format fcvtzu rd, rn.
+ * D represents the destination register type (e.g. for Xd, D = uint64_t).
+ * N represents the source register type (e.g. for Sd, N = float).
+ * Returns single value of type D. */
+template <typename D, typename N>
+D fcvtzu_integer(std::vector<RegisterValue>& operands) {
+  N input = operands[0].get<N>();
+  D result = static_cast<D>(0);
+
+  // Check for nan and less than 0
+  if (!std::isnan(input) && (input > static_cast<N>(0))) {
+    if (std::isinf(input)) {
+      // Account for Infinity
+      result = std::numeric_limits<D>::max();
+    } else if (input > std::numeric_limits<D>::max()) {
+      // Account for the source value being larger than the
+      // destination register can support
+      result = std::numeric_limits<D>::max();
+    } else {
+      result = static_cast<D>(std::trunc(input));
+    }
   }
-};
+
+  return result;
+}
+
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/load.hh b/src/include/simeng/arch/aarch64/helpers/load.hh
deleted file mode 100644
index 08f68d726a..0000000000
--- a/src/include/simeng/arch/aarch64/helpers/load.hh
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-
-#include "auxiliaryFunctions.hh"
-
-namespace simeng {
-namespace arch {
-namespace aarch64 {
-class loadHelp {
- public:
-  static void tempFunc() { return; }
-};
-}  // namespace aarch64
-}  // namespace arch
-}  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/logical.hh b/src/include/simeng/arch/aarch64/helpers/logical.hh
index 154bf2e59a..d81c449522 100644
--- a/src/include/simeng/arch/aarch64/helpers/logical.hh
+++ b/src/include/simeng/arch/aarch64/helpers/logical.hh
@@ -5,105 +5,100 @@
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class logicalHelp {
- public:
-  /** Helper function for instructions with the format `asrv rd, rn, rm`.
-   * T represents the type of operands (e.g. for xn, T = int64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T asrv_3gpr(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<typename std::make_unsigned<T>::type>();
-    return n >> (m % (sizeof(T) * 8));
-  }
 
-  /** Helper function for instructions with the format `bic rd, rn, rm{, shift
-   * #amount}`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of [resulting value, nzcv]. */
-  template <typename T>
-  static std::tuple<T, uint8_t> bicShift_3ops(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      bool calcNZCV) {
-    const T x = operands[0].get<T>();
-    const T y =
-        ~shiftValue(operands[1].get<T>(), metadata.operands[2].shift.type,
-                    metadata.operands[2].shift.value);
-    T result = x & y;
-    bool n = sizeof(T) == 8 ? (static_cast<int64_t>(result) < 0)
-                            : (static_cast<int32_t>(result) < 0);
-    bool z = (result == 0);
-    uint8_t nzcv = calcNZCV ? AuxFunc::nzcv(n, z, false, false) : 0;
-    return {result, nzcv};
-  }
+/** Helper function for instructions with the format `asrv rd, rn, rm`.
+ * T represents the type of operands (e.g. for xn, T = int64_t).
+ * Returns single value of type T. */
+template <typename T>
+T asrv_3gpr(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<typename std::make_unsigned<T>::type>();
+  return n >> (m % (sizeof(T) * 8));
+}
 
-  /** Helper function for instructions with the format `<and, eor, ...> rd, rn,
-   * #imm`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of [resulting value, nzcv]. */
-  template <typename T>
-  static std::tuple<T, uint8_t> logicOp_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV,
-      std::function<T(T, T)> func) {
-    const T n = operands[0].get<T>();
-    const T m = static_cast<T>(metadata.operands[2].imm);
-    T result = func(n, m);
-    uint8_t nzcv = calcNZCV ? AuxFunc::nzcv(result >> ((sizeof(T) * 8) - 1),
-                                            result == 0, false, false)
-                            : 0;
-    return {result, nzcv};
-  }
+/** Helper function for instructions with the format `bic rd, rn, rm{, shift
+ * #amount}`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of [resulting value, nzcv]. */
+template <typename T>
+std::tuple<T, uint8_t> bicShift_3ops(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV) {
+  const T x = operands[0].get<T>();
+  const T y = ~shiftValue(operands[1].get<T>(), metadata.operands[2].shift.type,
+                          metadata.operands[2].shift.value);
+  T result = x & y;
+  bool n = sizeof(T) == 8 ? (static_cast<int64_t>(result) < 0)
+                          : (static_cast<int32_t>(result) < 0);
+  bool z = (result == 0);
+  uint8_t nzcv_ = calcNZCV ? nzcv(n, z, false, false) : 0;
+  return {result, nzcv_};
+}
 
-  /** Helper function for instructions with the format `<and, eor, ...> rd, rn,
-   * rm{, shift #amount}`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns tuple of [resulting value, nzcv]. */
-  template <typename T>
-  static std::tuple<T, uint8_t> logicOpShift_3ops(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV,
-      std::function<T(T, T)> func) {
-    const T n = operands[0].get<T>();
-    const T m =
-        shiftValue(operands[1].get<T>(), metadata.operands[2].shift.type,
-                   metadata.operands[2].shift.value);
-    T result = func(n, m);
-    uint8_t nzcv = calcNZCV ? AuxFunc::nzcv(result >> ((sizeof(T) * 8) - 1),
-                                            result == 0, false, false)
-                            : 0;
-    return {result, nzcv};
-  }
+/** Helper function for instructions with the format `<and, eor, ...> rd, rn,
+ * #imm`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of [resulting value, nzcv]. */
+template <typename T>
+std::tuple<T, uint8_t> logicOp_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV,
+    std::function<T(T, T)> func) {
+  const T n = operands[0].get<T>();
+  const T m = static_cast<T>(metadata.operands[2].imm);
+  T result = func(n, m);
+  uint8_t nzcv_ = calcNZCV ? nzcv(result >> ((sizeof(T) * 8) - 1), result == 0,
+                                  false, false)
+                           : 0;
+  return {result, nzcv_};
+}
 
-  /** Helper function for instructions with the format `ls<l,r>v rd, rn, rm`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type uint64_t. */
-  template <typename T>
-  static uint64_t logicalShiftLR_3ops(std::vector<RegisterValue>& operands,
-                                      bool isLSL) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>() & ((sizeof(T) * 8) - 1);
-    uint64_t result = static_cast<uint64_t>(isLSL ? n << m : n >> m);
-    return result;
-  }
+/** Helper function for instructions with the format `<and, eor, ...> rd, rn,
+ * rm{, shift #amount}`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns tuple of [resulting value, nzcv]. */
+template <typename T>
+std::tuple<T, uint8_t> logicOpShift_3ops(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool calcNZCV,
+    std::function<T(T, T)> func) {
+  const T n = operands[0].get<T>();
+  const T m = shiftValue(operands[1].get<T>(), metadata.operands[2].shift.type,
+                         metadata.operands[2].shift.value);
+  T result = func(n, m);
+  uint8_t nzcv_ = calcNZCV ? nzcv(result >> ((sizeof(T) * 8) - 1), result == 0,
+                                  false, false)
+                           : 0;
+  return {result, nzcv_};
+}
 
-  /** Helper function for instructions with the format `rorv rd, rn, rm`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T rorv_3ops(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>();
+/** Helper function for instructions with the format `ls<l,r>v rd, rn, rm`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type uint64_t. */
+template <typename T>
+uint64_t logicalShiftLR_3ops(std::vector<RegisterValue>& operands, bool isLSL) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>() & ((sizeof(T) * 8) - 1);
+  uint64_t result = static_cast<uint64_t>(isLSL ? n << m : n >> m);
+  return result;
+}
 
-    const uint16_t data_size = sizeof(T) * 8;
-    T remainder = m % data_size;
+/** Helper function for instructions with the format `rorv rd, rn, rm`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T rorv_3ops(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>();
+
+  const uint16_t data_size = sizeof(T) * 8;
+  T remainder = m % data_size;
+
+  // Check if any rotation done at all
+  if (remainder == 0) return n;
+  return (n >> remainder) + (n << (data_size - remainder));
+}
 
-    // Check if any rotation done at all
-    if (remainder == 0) return n;
-    return (n >> remainder) + (n << (data_size - remainder));
-  }
-};
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/multiply.hh b/src/include/simeng/arch/aarch64/helpers/multiply.hh
index d5466bac04..72193ed966 100644
--- a/src/include/simeng/arch/aarch64/helpers/multiply.hh
+++ b/src/include/simeng/arch/aarch64/helpers/multiply.hh
@@ -5,54 +5,53 @@
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class multiplyHelp {
- public:
-  /** Helper function for instructions with the format `madd rd, rn, rm, ra`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T madd_4ops(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>();
-    const T a = operands[2].get<T>();
-    return (a + (n * m));
-  }
 
-  /** Helper function for instructions with the format `maddl xd, wn, wm, xa`.
-   * D represents the type of the destination register (either int64_t or
-   * uint64_t).
-   * N represents the type of the first source register (either
-   * int32_t or uint32_t).
-   * Returns single value of type D. */
-  template <typename D, typename N>
-  static D maddl_4ops(std::vector<RegisterValue>& operands) {
-    const D n = static_cast<D>(operands[0].get<N>());
-    const D m = static_cast<D>(operands[1].get<N>());
-    const D a = operands[2].get<D>();
-    return (a + (n * m));
-  }
+/** Helper function for instructions with the format `madd rd, rn, rm, ra`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T madd_4ops(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>();
+  const T a = operands[2].get<T>();
+  return (a + (n * m));
+}
 
-  /** Helper function for instructions with the format `mul rd, rn, rm`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T mul_3ops(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>();
-    return (n * m);
-  }
+/** Helper function for instructions with the format `maddl xd, wn, wm, xa`.
+ * D represents the type of the destination register (either int64_t or
+ * uint64_t).
+ * N represents the type of the first source register (either
+ * int32_t or uint32_t).
+ * Returns single value of type D. */
+template <typename D, typename N>
+D maddl_4ops(std::vector<RegisterValue>& operands) {
+  const D n = static_cast<D>(operands[0].get<N>());
+  const D m = static_cast<D>(operands[1].get<N>());
+  const D a = operands[2].get<D>();
+  return (a + (n * m));
+}
+
+/** Helper function for instructions with the format `mul rd, rn, rm`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T mul_3ops(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>();
+  return (n * m);
+}
+
+/** Helper function for instructions with the format `msub rd, rn, rm, ra`.
+ * T represents the type of operands (e.g. for xn, T = uint64_t).
+ * Returns single value of type T. */
+template <typename T>
+T msub_4ops(std::vector<RegisterValue>& operands) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>();
+  const T a = operands[2].get<T>();
+  return (a - (n * m));
+}
 
-  /** Helper function for instructions with the format `msub rd, rn, rm, ra`.
-   * T represents the type of operands (e.g. for xn, T = uint64_t).
-   * Returns single value of type T. */
-  template <typename T>
-  static T msub_4ops(std::vector<RegisterValue>& operands) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>();
-    const T a = operands[2].get<T>();
-    return (a - (n * m));
-  }
-};
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh
index c9382ff954..c4978fbc09 100644
--- a/src/include/simeng/arch/aarch64/helpers/neon.hh
+++ b/src/include/simeng/arch/aarch64/helpers/neon.hh
@@ -5,956 +5,945 @@
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class neonHelp {
- public:
-  /** Helper function for NEON instructions with the format `add vd, vn, vm`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted Register Value. */
-  template <typename T, int I>
-  static RegisterValue vecAdd_3ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = static_cast<T>(n[i] + m[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `addp vd, vn, vm`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted Register Value. */
-  template <typename T, int I>
-  static RegisterValue vecAddp_3ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    uint8_t offset = I / 2;
-    for (int i = 0; i < I; i++) {
-      if (i < offset) {
-        out[i] = static_cast<T>(n[i * 2] + n[(i * 2) + 1]);
-      } else {
-        out[i] =
-            static_cast<T>(m[(i - offset) * 2] + m[((i - offset) * 2) + 1]);
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `bic vd, vn, vm`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted Register Value. */
-  template <typename T, int I>
-  static RegisterValue vecBic_3ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = n[i] & ~m[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `bic vd, #imm{, lsl
-   * #shift}`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted Register Value. */
-  template <typename T, int I>
-  static RegisterValue vecBicShift_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    const T* d = operands[0].getAsVector<T>();
-    T imm = ~shiftValue(static_cast<T>(metadata.operands[1].imm),
-                        metadata.operands[1].shift.type,
-                        metadata.operands[1].shift.value);
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = d[i] & imm;
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `bi<f,t> vd, vn,
-   * vm`.
-   * I represents the number of elements in the output array to be updated
-   * (e.g. for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <int I>
-  static RegisterValue vecBitwiseInsert(std::vector<RegisterValue>& operands,
-                                        bool isBif) {
-    const uint64_t* d = operands[0].getAsVector<uint64_t>();
-    const uint64_t* n = operands[1].getAsVector<uint64_t>();
-    const uint64_t* m = operands[2].getAsVector<uint64_t>();
-    uint64_t out[2] = {0};
-    for (int i = 0; i < (I / 8); i++) {
-      out[i] = isBif ? (d[i] & m[i]) | (n[i] & ~m[i])
-                     : (d[i] & ~m[i]) | (n[i] & m[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `bsl vd, vn,
-   * vm`.
-   * I represents the number of elements in the output array to be updated
-   * (e.g. for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <int I>
-  static RegisterValue vecBsl(std::vector<RegisterValue>& operands) {
-    const uint64_t* d = operands[0].getAsVector<uint64_t>();
-    const uint64_t* n = operands[1].getAsVector<uint64_t>();
-    const uint64_t* m = operands[2].getAsVector<uint64_t>();
-    uint64_t out[2] = {0};
-    for (int i = 0; i < (I / 8); i++) {
-      out[i] = (d[i] & n[i]) | (~d[i] & m[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for instructions with the format `cm<eq, ge, gt, hi, hs,
-   *le, lt> vd, vn, <vm, #0>`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecCompare(std::vector<RegisterValue>& operands,
-                                  bool cmpToZero,
-                                  std::function<bool(T, T)> func) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m;
-    if (!cmpToZero) m = operands[1].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = func(n[i], cmpToZero ? static_cast<T>(0) : m[i])
-                   ? static_cast<T>(-1)
-                   : 0;
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for instructions with the format `cnt vd, vn`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecCountPerByte(std::vector<RegisterValue>& operands) {
-    const uint8_t* n = operands[0].getAsVector<uint8_t>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      for (int j = 0; j < (sizeof(T) * 8); j++) {
-        // Move queried bit to LSB and extract via an AND operator
-        out[i] += ((n[i] >> j) & 1);
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `dup <rd, vd>,
-   * <vn[index], rn>`.
-   * T represents the type of operands (e.g. for vd.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecDup_gprOrIndex(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata, bool useGpr) {
-    int index = useGpr ? 0 : metadata.operands[1].vector_index;
-    T element =
-        useGpr ? operands[0].get<T>() : operands[0].getAsVector<T>()[index];
-    T out[16 / sizeof(T)] = {0};
-    std::fill_n(std::begin(out), I, element);
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `ext vd,
-   *  vn, vm, #index`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecExtVecs_index(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-    const uint64_t index = static_cast<uint64_t>(metadata.operands[3].imm);
-    T out[16 / sizeof(T)] = {0};
-
-    for (int i = index; i < I; i++) {
-      out[i - index] = n[i];
-    }
-    for (int i = 0; i < index; i++) {
-      out[I - index + i] = m[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fabd vd.T, vn.T,
-   * vm.T`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFabd(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = std::fabs(n[i] - m[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fabs vd, vn`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFabs_2ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = std::fabs(n[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for instructions with the format `fcm<eq, ge, gt, hi, hs,
-   *le, lt> vd, vn, <vm, #0>`.
-   * T represents operand type (e.g. vd.2d is double).
-   * C represents comparison type (e.g. for T=float, comparison type is
-   * uint32_t).
-   * I represents the number of elements in the output array to be
-   * updated (e.g. for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, typename C, int I>
-  static RegisterValue vecFCompare(std::vector<RegisterValue>& operands,
-                                   bool cmpToZero,
-                                   std::function<bool(T, T)> func) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m;
-    if (!cmpToZero) m = operands[1].getAsVector<T>();
-    C out[16 / sizeof(C)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = func(n[i], cmpToZero ? static_cast<T>(0) : m[i])
-                   ? static_cast<C>(-1)
-                   : 0;
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fcvtl{2} vd, vn`.
-   * D represents the dest. vector register type (e.g. vd.2d would be double).
-   * N represents the source vector register type (e.g. vd.4s would be float).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N, int I>
-  static RegisterValue vecFcvtl(std::vector<RegisterValue>& operands,
-                                bool isFcvtl2) {
-    const N* n = operands[0].getAsVector<N>();
-    D out[16 / sizeof(D)] = {0};
-    for (int i = (isFcvtl2 ? I : 0); i < (isFcvtl2 ? (I * 2) : I); i++) {
-      out[isFcvtl2 ? (i - I) : i] = static_cast<D>(n[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fcvtn{2} vd, vn`.
-   * D represents the dest. vector register type (e.g. vd.2s would be float).
-   * N represents the source vector register type (e.g. vd.2d would be double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N, int I>
-  static RegisterValue vecFcvtn(std::vector<RegisterValue>& operands,
-                                bool isFcvtn2) {
-    const N* n = operands[0].getAsVector<N>();
-    D out[16 / sizeof(D)] = {0};
-    for (int i = (isFcvtn2 ? (I / 2) : 0); i < I; i++) {
-      out[i] = static_cast<D>(n[isFcvtn2 ? (i - (I / 2)) : i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fcvtzs vd, vn`.
-   * D represents the dest. vector register type (e.g. vd.2s would be float).
-   * N represents the source vector register type (e.g. vd.2d would be double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N, int I>
-  static RegisterValue vecFcvtzs(std::vector<RegisterValue>& operands) {
-    const N* n = operands[0].getAsVector<N>();
-    D out[16 / sizeof(D)] = {0};
-    // TODO: Handle NaNs, denorms, and saturation
-    for (int i = 0; i < I; i++) {
-      out[i] = static_cast<D>(std::trunc(n[i]));
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fmla vd,
-   *  vn, vm`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFmla_3vecs(std::vector<RegisterValue>& operands) {
-    const T* d = operands[0].getAsVector<T>();
-    const T* n = operands[1].getAsVector<T>();
-    const T* m = operands[2].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = d[i] + n[i] * m[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fmla vd,
-   *  vn, vm[index]`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFmlaIndexed_3vecs(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    const T* d = operands[0].getAsVector<T>();
-    const T* n = operands[1].getAsVector<T>();
-    int index = metadata.operands[2].vector_index;
-    const T m = operands[2].getAsVector<T>()[index];
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = d[i] + n[i] * m;
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fmls vd,
-   *  vn, vm`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFmls_3vecs(std::vector<RegisterValue>& operands) {
-    const T* d = operands[0].getAsVector<T>();
-    const T* n = operands[1].getAsVector<T>();
-    const T* m = operands[2].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = d[i] - (n[i] * m[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fmls vd,
-   *  vn, vm[index]`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFmlsIndexed_3vecs(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    const T* d = operands[0].getAsVector<T>();
-    const T* n = operands[1].getAsVector<T>();
-    int index = metadata.operands[2].vector_index;
-    const T m = operands[2].getAsVector<T>()[index];
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = d[i] - n[i] * m;
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fmul rd,
-   *  rn, vm[index]`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFmulIndexed_vecs(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    int index = metadata.operands[2].vector_index;
-    const T* n = operands[0].getAsVector<T>();
-    const T m = operands[1].getAsVector<T>()[index];
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = n[i] * m;
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fneg vd, vn`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFneg_2ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = -n[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `fsqrt vd, vn`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFsqrt_2ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = ::sqrt(n[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `frsqrte vd, vn`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFrsqrte_2ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = 1.0f / sqrtf(n[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `frsqrts vd, vn,
-   * vm`.
-   * T represents the type of operands (e.g. for vn.2d, T = double).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecFrsqrts_3ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = (3.0f - n[i] * m[i]) / 2.0f;
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `ins vd[index],
-   *  vn[index]`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecIns_2Index(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    const T* d = operands[0].getAsVector<T>();
-    const T* n = operands[1].getAsVector<T>();
-
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = d[i];
-    }
-    out[metadata.operands[0].vector_index] =
-        n[metadata.operands[1].vector_index];
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `ins vd[index],
-   *  rn`.
-   * T represents the vector register type (e.g. vd.16b would be uint8_t).
-   * R represents the type of the GPR (e.g. wn would be uint32_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, typename R, int I>
-  static RegisterValue vecInsIndex_gpr(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    const T* d = operands[0].getAsVector<T>();
-    const T n = operands[1].get<R>();
-    T out[16 / sizeof(T)] = {0};
-
-    for (int i = 0; i < I; i++) {
+
+/** Helper function for NEON instructions with the format `add vd, vn, vm`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted Register Value. */
+template <typename T, int I>
+RegisterValue vecAdd_3ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = static_cast<T>(n[i] + m[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `addp vd, vn, vm`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted Register Value. */
+template <typename T, int I>
+RegisterValue vecAddp_3ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  uint8_t offset = I / 2;
+  for (int i = 0; i < I; i++) {
+    if (i < offset) {
+      out[i] = static_cast<T>(n[i * 2] + n[(i * 2) + 1]);
+    } else {
+      out[i] = static_cast<T>(m[(i - offset) * 2] + m[((i - offset) * 2) + 1]);
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `bic vd, vn, vm`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted Register Value. */
+template <typename T, int I>
+RegisterValue vecBic_3ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = n[i] & ~m[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `bic vd, #imm{, lsl
+ * #shift}`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted Register Value. */
+template <typename T, int I>
+RegisterValue vecBicShift_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  const T* d = operands[0].getAsVector<T>();
+  T imm = ~shiftValue(static_cast<T>(metadata.operands[1].imm),
+                      metadata.operands[1].shift.type,
+                      metadata.operands[1].shift.value);
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = d[i] & imm;
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `bi<f,t> vd, vn,
+ * vm`.
+ * I represents the number of elements in the output array to be updated
+ * (e.g. for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <int I>
+RegisterValue vecBitwiseInsert(std::vector<RegisterValue>& operands,
+                               bool isBif) {
+  const uint64_t* d = operands[0].getAsVector<uint64_t>();
+  const uint64_t* n = operands[1].getAsVector<uint64_t>();
+  const uint64_t* m = operands[2].getAsVector<uint64_t>();
+  uint64_t out[2] = {0};
+  for (int i = 0; i < (I / 8); i++) {
+    out[i] =
+        isBif ? (d[i] & m[i]) | (n[i] & ~m[i]) : (d[i] & ~m[i]) | (n[i] & m[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `bsl vd, vn,
+ * vm`.
+ * I represents the number of elements in the output array to be updated
+ * (e.g. for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <int I>
+RegisterValue vecBsl(std::vector<RegisterValue>& operands) {
+  const uint64_t* d = operands[0].getAsVector<uint64_t>();
+  const uint64_t* n = operands[1].getAsVector<uint64_t>();
+  const uint64_t* m = operands[2].getAsVector<uint64_t>();
+  uint64_t out[2] = {0};
+  for (int i = 0; i < (I / 8); i++) {
+    out[i] = (d[i] & n[i]) | (~d[i] & m[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for instructions with the format `cm<eq, ge, gt, hi, hs,
+ *le, lt> vd, vn, <vm, #0>`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecCompare(std::vector<RegisterValue>& operands, bool cmpToZero,
+                         std::function<bool(T, T)> func) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m;
+  if (!cmpToZero) m = operands[1].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = func(n[i], cmpToZero ? static_cast<T>(0) : m[i])
+                 ? static_cast<T>(-1)
+                 : 0;
+  }
+  return {out, 256};
+}
+
+/** Helper function for instructions with the format `cnt vd, vn`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecCountPerByte(std::vector<RegisterValue>& operands) {
+  const uint8_t* n = operands[0].getAsVector<uint8_t>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    for (int j = 0; j < (sizeof(T) * 8); j++) {
+      // Move queried bit to LSB and extract via an AND operator
+      out[i] += ((n[i] >> j) & 1);
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `dup <rd, vd>,
+ * <vn[index], rn>`.
+ * T represents the type of operands (e.g. for vd.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecDup_gprOrIndex(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool useGpr) {
+  int index = useGpr ? 0 : metadata.operands[1].vector_index;
+  T element =
+      useGpr ? operands[0].get<T>() : operands[0].getAsVector<T>()[index];
+  T out[16 / sizeof(T)] = {0};
+  std::fill_n(std::begin(out), I, element);
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `ext vd,
+ *  vn, vm, #index`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecExtVecs_index(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+  const uint64_t index = static_cast<uint64_t>(metadata.operands[3].imm);
+  T out[16 / sizeof(T)] = {0};
+
+  for (int i = index; i < I; i++) {
+    out[i - index] = n[i];
+  }
+  for (int i = 0; i < index; i++) {
+    out[I - index + i] = m[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fabd vd.T, vn.T,
+ * vm.T`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFabd(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = std::fabs(n[i] - m[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fabs vd, vn`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFabs_2ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = std::fabs(n[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for instructions with the format `fcm<eq, ge, gt, hi, hs,
+ *le, lt> vd, vn, <vm, #0>`.
+ * T represents operand type (e.g. vd.2d is double).
+ * C represents comparison type (e.g. for T=float, comparison type is
+ * uint32_t).
+ * I represents the number of elements in the output array to be
+ * updated (e.g. for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, typename C, int I>
+RegisterValue vecFCompare(std::vector<RegisterValue>& operands, bool cmpToZero,
+                          std::function<bool(T, T)> func) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m;
+  if (!cmpToZero) m = operands[1].getAsVector<T>();
+  C out[16 / sizeof(C)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = func(n[i], cmpToZero ? static_cast<T>(0) : m[i])
+                 ? static_cast<C>(-1)
+                 : 0;
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fcvtl{2} vd, vn`.
+ * D represents the dest. vector register type (e.g. vd.2d would be double).
+ * N represents the source vector register type (e.g. vd.4s would be float).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename D, typename N, int I>
+RegisterValue vecFcvtl(std::vector<RegisterValue>& operands, bool isFcvtl2) {
+  const N* n = operands[0].getAsVector<N>();
+  D out[16 / sizeof(D)] = {0};
+  for (int i = (isFcvtl2 ? I : 0); i < (isFcvtl2 ? (I * 2) : I); i++) {
+    out[isFcvtl2 ? (i - I) : i] = static_cast<D>(n[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fcvtn{2} vd, vn`.
+ * D represents the dest. vector register type (e.g. vd.2s would be float).
+ * N represents the source vector register type (e.g. vd.2d would be double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename D, typename N, int I>
+RegisterValue vecFcvtn(std::vector<RegisterValue>& operands, bool isFcvtn2) {
+  const N* n = operands[0].getAsVector<N>();
+  D out[16 / sizeof(D)] = {0};
+  for (int i = (isFcvtn2 ? (I / 2) : 0); i < I; i++) {
+    out[i] = static_cast<D>(n[isFcvtn2 ? (i - (I / 2)) : i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fcvtzs vd, vn`.
+ * D represents the dest. vector register type (e.g. vd.2s would be float).
+ * N represents the source vector register type (e.g. vd.2d would be double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename D, typename N, int I>
+RegisterValue vecFcvtzs(std::vector<RegisterValue>& operands) {
+  const N* n = operands[0].getAsVector<N>();
+  D out[16 / sizeof(D)] = {0};
+  // TODO: Handle NaNs, denorms, and saturation
+  for (int i = 0; i < I; i++) {
+    out[i] = static_cast<D>(std::trunc(n[i]));
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fmla vd,
+ *  vn, vm`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFmla_3vecs(std::vector<RegisterValue>& operands) {
+  const T* d = operands[0].getAsVector<T>();
+  const T* n = operands[1].getAsVector<T>();
+  const T* m = operands[2].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = d[i] + n[i] * m[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fmla vd,
+ *  vn, vm[index]`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFmlaIndexed_3vecs(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  const T* d = operands[0].getAsVector<T>();
+  const T* n = operands[1].getAsVector<T>();
+  int index = metadata.operands[2].vector_index;
+  const T m = operands[2].getAsVector<T>()[index];
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = d[i] + n[i] * m;
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fmls vd,
+ *  vn, vm`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFmls_3vecs(std::vector<RegisterValue>& operands) {
+  const T* d = operands[0].getAsVector<T>();
+  const T* n = operands[1].getAsVector<T>();
+  const T* m = operands[2].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = d[i] - (n[i] * m[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fmls vd,
+ *  vn, vm[index]`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFmlsIndexed_3vecs(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  const T* d = operands[0].getAsVector<T>();
+  const T* n = operands[1].getAsVector<T>();
+  int index = metadata.operands[2].vector_index;
+  const T m = operands[2].getAsVector<T>()[index];
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = d[i] - n[i] * m;
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fmul rd,
+ *  rn, vm[index]`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFmulIndexed_vecs(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  int index = metadata.operands[2].vector_index;
+  const T* n = operands[0].getAsVector<T>();
+  const T m = operands[1].getAsVector<T>()[index];
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = n[i] * m;
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fneg vd, vn`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFneg_2ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = -n[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `fsqrt vd, vn`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFsqrt_2ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = ::sqrt(n[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `frsqrte vd, vn`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFrsqrte_2ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = 1.0f / sqrtf(n[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `frsqrts vd, vn,
+ * vm`.
+ * T represents the type of operands (e.g. for vn.2d, T = double).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecFrsqrts_3ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = (3.0f - n[i] * m[i]) / 2.0f;
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `ins vd[index],
+ *  vn[index]`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecIns_2Index(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  const T* d = operands[0].getAsVector<T>();
+  const T* n = operands[1].getAsVector<T>();
+
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = d[i];
+  }
+  out[metadata.operands[0].vector_index] = n[metadata.operands[1].vector_index];
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `ins vd[index],
+ *  rn`.
+ * T represents the vector register type (e.g. vd.16b would be uint8_t).
+ * R represents the type of the GPR (e.g. wn would be uint32_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, typename R, int I>
+RegisterValue vecInsIndex_gpr(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  const T* d = operands[0].getAsVector<T>();
+  const T n = operands[1].get<R>();
+  T out[16 / sizeof(T)] = {0};
+
+  for (int i = 0; i < I; i++) {
+    out[i] = d[i];
+  }
+  out[metadata.operands[0].vector_index] = n;
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `<NOT, ...> vd,
+ *  vn`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecLogicOp_2vecs(std::vector<RegisterValue>& operands,
+                               std::function<T(T)> func) {
+  const T* n = operands[0].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = func(n[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `<AND, EOR, ...> vd,
+ *  vn, vm`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecLogicOp_3vecs(std::vector<RegisterValue>& operands,
+                               std::function<T(T, T)> func) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = func(n[i], m[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `umaxp vd, vn, vm`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecUMaxP(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  T out[I];
+  for (int i = 0; i < I; i++) {
+    out[i] = std::max(n[i], m[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `uminp vd, vn, vm`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecUMinP(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  T out[I];
+  for (int i = 0; i < I; i++) {
+    out[i] = std::min(n[i], m[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `maxnmp rd, vn`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecMaxnmp_2ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  bool isFP = std::is_floating_point<T>::value;
+
+  T out = n[0];
+  for (int i = 1; i < I; i++) {
+    out = isFP ? std::fmax(n[i], out) : std::max(n[i], out);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `sminv sd, vn`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecMinv_2ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  bool isFP = std::is_floating_point<T>::value;
+
+  T out = n[0];
+  for (int i = 1; i < I; i++) {
+    out = isFP ? std::fmin(n[i], out) : std::min(n[i], out);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `movi vd, #imm`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecMovi_imm(
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  bool isFP = std::is_floating_point<T>::value;
+  const T imm =
+      isFP ? metadata.operands[1].fp : static_cast<T>(metadata.operands[1].imm);
+  T out[16 / sizeof(T)] = {0};
+  std::fill_n(std::begin(out), I, imm);
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `movi vd, #imm{, lsl
+ * #shift}`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecMoviShift_imm(
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool negate) {
+  const T bits = shiftValue(static_cast<T>(metadata.operands[1].imm),
+                            metadata.operands[1].shift.type,
+                            metadata.operands[1].shift.value);
+  T out[16 / sizeof(T)] = {0};
+  std::fill_n(std::begin(out), I, negate ? ~bits : bits);
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `scvtf vd,
+ *  vn`.
+ * D represents the destination vector register type (e.g. for vd.2d, D =
+ * double).
+ * N represents the source vector register type (e.g. for vn.2s N = int32_t).
+ * I represents the number of elements in the output array to be
+ * updated (e.g. for vd.8b I = 8).
+ * Returns correctly formated RegisterValue. */
+template <typename D, typename N, int I>
+RegisterValue vecScvtf_2vecs(std::vector<RegisterValue>& operands,
+                             std::function<D(N)> func) {
+  const N* n = operands[0].getAsVector<N>();
+  D out[16 / sizeof(D)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = static_cast<D>(n[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `shl vd, vn, #imm`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecShlShift_vecImm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  const T* n = operands[0].getAsVector<T>();
+  int64_t shift = metadata.operands[2].imm;
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = static_cast<T>(n[i] << shift);
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `shll{2} vd, vn,
+ * #imm`.
+ * D represents the destination register type (e.g. for vd.2d D = int64_t).
+ * N represents the source register type (e.g. for vd.4s D = int32_t).
+ * I represents the number of elements in the output array to be
+ * updated (e.g. for vd.8h the I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename D, typename N, int I>
+RegisterValue vecShllShift_vecImm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata, bool isShll2) {
+  const N* n = operands[0].getAsVector<N>();
+  uint64_t shift = metadata.operands[2].imm;
+  D out[16 / sizeof(D)] = {0};
+  int index = isShll2 ? I : 0;
+  for (int i = 0; i < I; i++) {
+    out[i] = n[index] << shift;
+    index++;
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `shrn vd, vn, #imm`.
+ * Ta represents the type of source operand (e.g. for vn.2d, Ta = uint64_t).
+ * Tb represents the type of destination operand (e.g. for vd.2s, Tb =
+ * uint32_t).
+ * I represents the number of elements in the output array to be
+ * updated (e.g. for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue.
+ */
+template <typename Ta, typename Tb, int I>
+RegisterValue vecShrnShift_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    bool shrn2 = false) {
+  const Ta* n = operands[0].getAsVector<Ta>();
+
+  uint64_t shift = metadata.operands[2].imm;
+
+  Tb out[16 / sizeof(Tb)] = {0};
+  int index = shrn2 ? I : 0;
+  for (int i = 0; i < I; i++) {
+    out[index + i] = static_cast<Tb>(std::trunc(n[i] >> shift));
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `sshr vd, vn, #imm`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecSshrShift_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  const T* n = operands[1].getAsVector<T>();
+  uint64_t shift = metadata.operands[2].imm;
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I; i++) {
+    out[i] = static_cast<T>(std::trunc(n[i] >> shift));
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `addp rd, vn`.
+ * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int I>
+RegisterValue vecSumElems_2ops(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  T out = 0;
+  for (int i = 0; i < I; i++) {
+    out += n[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `xtn{2} vd, vn`.
+ * D represents the type of the dest. register (e.g. for vd.s, D = uint32_t).
+ * N represents the type of the source register (e.g. for vn.d, N = uint64_t).
+ * I represents the number of elements in the output vector to be
+ * updated (i.e. for vd.4s I = 4).
+ * Returns correctly formatted RegisterValue. */
+template <typename D, typename N, int I>
+RegisterValue vecXtn(std::vector<RegisterValue>& operands, bool isXtn2) {
+  const D* d;
+  if (isXtn2) d = operands[0].getAsVector<D>();
+  const N* n = operands[isXtn2 ? 1 : 0].getAsVector<N>();
+
+  D out[16 / sizeof(D)] = {0};
+  int index = 0;
+
+  for (int i = 0; i < I; i++) {
+    if (isXtn2 & (i < (I / 2))) {
       out[i] = d[i];
-    }
-    out[metadata.operands[0].vector_index] = n;
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `<NOT, ...> vd,
-   *  vn`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecLogicOp_2vecs(std::vector<RegisterValue>& operands,
-                                        std::function<T(T)> func) {
-    const T* n = operands[0].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = func(n[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `<AND, EOR, ...> vd,
-   *  vn, vm`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecLogicOp_3vecs(std::vector<RegisterValue>& operands,
-                                        std::function<T(T, T)> func) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = func(n[i], m[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `umaxp vd, vn, vm`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecUMaxP(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    T out[I];
-    for (int i = 0; i < I; i++) {
-      out[i] = std::max(n[i], m[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `uminp vd, vn, vm`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecUMinP(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    T out[I];
-    for (int i = 0; i < I; i++) {
-      out[i] = std::min(n[i], m[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `maxnmp rd, vn`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecMaxnmp_2ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    bool isFP = std::is_floating_point<T>::value;
-
-    T out = n[0];
-    for (int i = 1; i < I; i++) {
-      out = isFP ? std::fmax(n[i], out) : std::max(n[i], out);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `sminv sd, vn`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecMinv_2ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    bool isFP = std::is_floating_point<T>::value;
-
-    T out = n[0];
-    for (int i = 1; i < I; i++) {
-      out = isFP ? std::fmin(n[i], out) : std::min(n[i], out);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `movi vd, #imm`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecMovi_imm(
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    bool isFP = std::is_floating_point<T>::value;
-    const T imm = isFP ? metadata.operands[1].fp
-                       : static_cast<T>(metadata.operands[1].imm);
-    T out[16 / sizeof(T)] = {0};
-    std::fill_n(std::begin(out), I, imm);
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `movi vd, #imm{, lsl
-   * #shift}`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecMoviShift_imm(
-      const simeng::arch::aarch64::InstructionMetadata& metadata, bool negate) {
-    const T bits = shiftValue(static_cast<T>(metadata.operands[1].imm),
-                              metadata.operands[1].shift.type,
-                              metadata.operands[1].shift.value);
-    T out[16 / sizeof(T)] = {0};
-    std::fill_n(std::begin(out), I, negate ? ~bits : bits);
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `scvtf vd,
-   *  vn`.
-   * D represents the destination vector register type (e.g. for vd.2d, D =
-   * double).
-   * N represents the source vector register type (e.g. for vn.2s N = int32_t).
-   * I represents the number of elements in the output array to be
-   * updated (e.g. for vd.8b I = 8).
-   * Returns correctly formated RegisterValue. */
-  template <typename D, typename N, int I>
-  static RegisterValue vecScvtf_2vecs(std::vector<RegisterValue>& operands,
-                                      std::function<D(N)> func) {
-    const N* n = operands[0].getAsVector<N>();
-    D out[16 / sizeof(D)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = static_cast<D>(n[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `shl vd, vn, #imm`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecShlShift_vecImm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    const T* n = operands[0].getAsVector<T>();
-    int64_t shift = metadata.operands[2].imm;
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = static_cast<T>(n[i] << shift);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `shll{2} vd, vn,
-   * #imm`.
-   * D represents the destination register type (e.g. for vd.2d D = int64_t).
-   * N represents the source register type (e.g. for vd.4s D = int32_t).
-   * I represents the number of elements in the output array to be
-   * updated (e.g. for vd.8h the I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N, int I>
-  static RegisterValue vecShllShift_vecImm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      bool isShll2) {
-    const N* n = operands[0].getAsVector<N>();
-    uint64_t shift = metadata.operands[2].imm;
-    D out[16 / sizeof(D)] = {0};
-    int index = isShll2 ? I : 0;
-    for (int i = 0; i < I; i++) {
-      out[i] = n[index] << shift;
+    } else {
+      out[i] = static_cast<D>(n[index]);
       index++;
     }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `shrn vd, vn, #imm`.
-   * Ta represents the type of source operand (e.g. for vn.2d, Ta = uint64_t).
-   * Tb represents the type of destination operand (e.g. for vd.2s, Tb =
-   * uint32_t).
-   * I represents the number of elements in the output array to be
-   * updated (e.g. for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue.
-   */
-  template <typename Ta, typename Tb, int I>
-  static RegisterValue vecShrnShift_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      bool shrn2 = false) {
-    const Ta* n = operands[0].getAsVector<Ta>();
-
-    uint64_t shift = metadata.operands[2].imm;
-
-    Tb out[16 / sizeof(Tb)] = {0};
-    int index = shrn2 ? I : 0;
-    for (int i = 0; i < I; i++) {
-      out[index + i] = static_cast<Tb>(std::trunc(n[i] >> shift));
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `sshr vd, vn, #imm`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecSshrShift_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    const T* n = operands[1].getAsVector<T>();
-    uint64_t shift = metadata.operands[2].imm;
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I; i++) {
-      out[i] = static_cast<T>(std::trunc(n[i] >> shift));
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `addp rd, vn`.
-   * T represents the type of operands (e.g. for vn.2d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int I>
-  static RegisterValue vecSumElems_2ops(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    T out = 0;
-    for (int i = 0; i < I; i++) {
-      out += n[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `xtn{2} vd, vn`.
-   * D represents the type of the dest. register (e.g. for vd.s, D = uint32_t).
-   * N represents the type of the source register (e.g. for vn.d, N = uint64_t).
-   * I represents the number of elements in the output vector to be
-   * updated (i.e. for vd.4s I = 4).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N, int I>
-  static RegisterValue vecXtn(std::vector<RegisterValue>& operands,
-                              bool isXtn2) {
-    const D* d;
-    if (isXtn2) d = operands[0].getAsVector<D>();
-    const N* n = operands[isXtn2 ? 1 : 0].getAsVector<N>();
-
-    D out[16 / sizeof(D)] = {0};
-    int index = 0;
-
-    for (int i = 0; i < I; i++) {
-      if (isXtn2 & (i < (I / 2))) {
-        out[i] = d[i];
-      } else {
-        out[i] = static_cast<D>(n[index]);
-        index++;
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `tbl Vd.Ta, {Vn.16b,
-   * ... Vn+3.16b}, Vm.Ta`.
-   * I represents the number of elements in the output vector to be updated
-   * (i.e. for vd.8b I = 8, vd.16b I = 16). Only 8 or 16 is valid for TBL
-   * instructions.
-   * Returns correctly formatted RegisterValue. */
-  template <int I>
-  static RegisterValue vecTbl(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata) {
-    // Vd and Vm are only valid in format 8b or 16b
-    assert(I == 8 || I == 16);
-
-    // Vm contains the indices to fetch from table
-    const int8_t* Vm =
-        operands[metadata.operandCount - 2]
-            .getAsVector<int8_t>();  // final operand is vecMovi_imm
-
-    // All operands except the first and last are the vector registers to
-    // construct the table from
-    const uint8_t n_table_regs = metadata.operandCount - 2;
-
-    // Create table from vectors. All table operands must be of 16b format.
-    int tableSize = 16 * n_table_regs;
-    uint8_t table[tableSize];
-    for (int i = 0; i < n_table_regs; i++) {
-      const int8_t* currentVector = operands[i].getAsVector<int8_t>();
-      for (int j = 0; j < 16; j++) {
-        table[16 * i + j] = currentVector[j];
-      }
-    }
-
-    int8_t out[16 / sizeof(int8_t)] = {0};
-    for (int i = 0; i < I; i++) {
-      unsigned int index = Vm[i];
-
-      // If an index is out of range for the table, the result for that lookup
-      // is 0
-      if (index >= tableSize) {
-        out[i] = 0;
-        continue;
-      }
-
-      out[i] = table[index];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `rev<16,32,64> Vd.T,
-   * Vn.T`.
-   * T represents the type of elements to be reversed (e.g. for Vn.d, T =
-   * uint64_t).
-   * V represents the variant: 16-bit, 32-bit, 64-bit. (e.g. for 64-bit each
-   * doubleword of the vector will be reversed).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vd.8b I = 8).
-   * It is only valid for T to be a same or smaller width than V.
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, int V, int I>
-  static RegisterValue vecRev(std::vector<RegisterValue>& operands) {
-    const T* source = operands[0].getAsVector<T>();
-    int element_size = (sizeof(T) * 8);
-    int datasize = I * element_size;
-    int container_size = V;
-    int n_containers = datasize / container_size;
-    int elements_per_container = container_size / element_size;
-
-    int element = 0;
-    int rev_element;
-    T out[16 / sizeof(T)] = {0};
-    for (int c = 0; c < n_containers; c++) {
-      rev_element = element + elements_per_container - 1;
-      for (int e = 0; e < elements_per_container; e++) {
-        out[rev_element] = source[element];
-        element++;
-        rev_element--;
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `trn1 Vd.T, Vn.T,
-   * Vm.T`.
-   * T represents the type of operands (e.g. for vn.d, T = uint64_t).
-   * I represents the number of operands (e.g. for vn.8b, I = 8).
-   * Returns formatted Register Value. */
-  template <typename T, int I>
-  static RegisterValue vecTrn1(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I / 2; i++) {
-      out[2 * i] = n[2 * i];
-      out[(2 * i) + 1] = m[2 * i];
-    }
-
-    return {out, 256};
   }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `tbl Vd.Ta, {Vn.16b,
+ * ... Vn+3.16b}, Vm.Ta`.
+ * I represents the number of elements in the output vector to be updated
+ * (i.e. for vd.8b I = 8, vd.16b I = 16). Only 8 or 16 is valid for TBL
+ * instructions.
+ * Returns correctly formatted RegisterValue. */
+template <int I>
+RegisterValue vecTbl(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata) {
+  // Vd and Vm are only valid in format 8b or 16b
+  assert(I == 8 || I == 16);
+
+  // Vm contains the indices to fetch from table
+  const int8_t* Vm =
+      operands[metadata.operandCount - 2]
+          .getAsVector<int8_t>();  // final operand is vecMovi_imm
+
+  // All operands except the first and last are the vector registers to
+  // construct the table from
+  const uint8_t n_table_regs = metadata.operandCount - 2;
+
+  // Create table from vectors. All table operands must be of 16b format.
+  int tableSize = 16 * n_table_regs;
+  uint8_t table[tableSize];
+  for (int i = 0; i < n_table_regs; i++) {
+    const int8_t* currentVector = operands[i].getAsVector<int8_t>();
+    for (int j = 0; j < 16; j++) {
+      table[16 * i + j] = currentVector[j];
+    }
+  }
+
+  int8_t out[16 / sizeof(int8_t)] = {0};
+  for (int i = 0; i < I; i++) {
+    unsigned int index = Vm[i];
+
+    // If an index is out of range for the table, the result for that lookup
+    // is 0
+    if (index >= tableSize) {
+      out[i] = 0;
+      continue;
+    }
+
+    out[i] = table[index];
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `rev<16,32,64> Vd.T,
+ * Vn.T`.
+ * T represents the type of elements to be reversed (e.g. for Vn.d, T =
+ * uint64_t).
+ * V represents the variant: 16-bit, 32-bit, 64-bit. (e.g. for 64-bit each
+ * doubleword of the vector will be reversed).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vd.8b I = 8).
+ * It is only valid for T to be a same or smaller width than V.
+ * Returns correctly formatted RegisterValue. */
+template <typename T, int V, int I>
+RegisterValue vecRev(std::vector<RegisterValue>& operands) {
+  const T* source = operands[0].getAsVector<T>();
+  int element_size = (sizeof(T) * 8);
+  int datasize = I * element_size;
+  int container_size = V;
+  int n_containers = datasize / container_size;
+  int elements_per_container = container_size / element_size;
+
+  int element = 0;
+  int rev_element;
+  T out[16 / sizeof(T)] = {0};
+  for (int c = 0; c < n_containers; c++) {
+    rev_element = element + elements_per_container - 1;
+    for (int e = 0; e < elements_per_container; e++) {
+      out[rev_element] = source[element];
+      element++;
+      rev_element--;
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `trn1 Vd.T, Vn.T,
+ * Vm.T`.
+ * T represents the type of operands (e.g. for vn.d, T = uint64_t).
+ * I represents the number of operands (e.g. for vn.8b, I = 8).
+ * Returns formatted Register Value. */
+template <typename T, int I>
+RegisterValue vecTrn1(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I / 2; i++) {
+    out[2 * i] = n[2 * i];
+    out[(2 * i) + 1] = m[2 * i];
+  }
+
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `trn2 Vd.T, Vn.T,
+ * Vm.T`.
+ * T represents the type of operands (e.g. for Vn.d, T = uint64_t).
+ * I represents the number of operands (e.g. for Vn.8b, I = 8).
+ * Returns formatted Register Value. */
+template <typename T, int I>
+RegisterValue vecTrn2(std::vector<RegisterValue>& operands) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I / 2; i++) {
+    out[2 * i] = n[(2 * i) + 1];
+    out[(2 * i) + 1] = m[(2 * i) + 1];
+  }
+
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `uzp<1,2> Vd.T,
+ * Vn.T, Vm.T`.
+ * T represents the type of operands (e.g. for Vn.d, T = uint64_t).
+ * I represents the number of operands (e.g. for Vn.8b, I = 8).
+ * Returns formatted Register Value. */
+template <typename T, int I>
+RegisterValue vecUzp(std::vector<RegisterValue>& operands, bool isUzp1) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  T out[16 / sizeof(T)] = {0};
+  for (int i = 0; i < I / 2; i++) {
+    int index = isUzp1 ? (2 * i) : (2 * i) + 1;
+    out[i] = n[index];
+    out[(I / 2) + i] = m[index];
+  }
+
+  return {out, 256};
+}
+
+/** Helper function for NEON instructions with the format `zip<1,2> vd.T,
+ * vn.T, vm.T`.
+ * T represents the type of operands (e.g. for vn.d, T = uint64_t).
+ * I represents the number of elements in the output array to be updated (e.g.
+ * for vn.8b, I = 8).
+ * Returns formatted Register Value. */
+template <typename T, int I>
+RegisterValue vecZip(std::vector<RegisterValue>& operands, bool isZip2) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  T out[16 / sizeof(T)] = {0};
+  int index = isZip2 ? (I / 2) : 0;
+  for (int i = 0; i < I / 2; i++) {
+    out[2 * i] = n[index];
+    out[(2 * i) + 1] = m[index];
+    index++;
+  }
+
+  return {out, 256};
+}
 
-  /** Helper function for NEON instructions with the format `trn2 Vd.T, Vn.T,
-   * Vm.T`.
-   * T represents the type of operands (e.g. for Vn.d, T = uint64_t).
-   * I represents the number of operands (e.g. for Vn.8b, I = 8).
-   * Returns formatted Register Value. */
-  template <typename T, int I>
-  static RegisterValue vecTrn2(std::vector<RegisterValue>& operands) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I / 2; i++) {
-      out[2 * i] = n[(2 * i) + 1];
-      out[(2 * i) + 1] = m[(2 * i) + 1];
-    }
-
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `uzp<1,2> Vd.T,
-   * Vn.T, Vm.T`.
-   * T represents the type of operands (e.g. for Vn.d, T = uint64_t).
-   * I represents the number of operands (e.g. for Vn.8b, I = 8).
-   * Returns formatted Register Value. */
-  template <typename T, int I>
-  static RegisterValue vecUzp(std::vector<RegisterValue>& operands,
-                              bool isUzp1) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    T out[16 / sizeof(T)] = {0};
-    for (int i = 0; i < I / 2; i++) {
-      int index = isUzp1 ? (2 * i) : (2 * i) + 1;
-      out[i] = n[index];
-      out[(I / 2) + i] = m[index];
-    }
-
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `zip<1,2> vd.T,
-   * vn.T, vm.T`.
-   * T represents the type of operands (e.g. for vn.d, T = uint64_t).
-   * I represents the number of elements in the output array to be updated (e.g.
-   * for vn.8b, I = 8).
-   * Returns formatted Register Value. */
-  template <typename T, int I>
-  static RegisterValue vecZip(std::vector<RegisterValue>& operands,
-                              bool isZip2) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    T out[16 / sizeof(T)] = {0};
-    int index = isZip2 ? (I / 2) : 0;
-    for (int i = 0; i < I / 2; i++) {
-      out[2 * i] = n[index];
-      out[(2 * i) + 1] = m[index];
-      index++;
-    }
-
-    return {out, 256};
-  }
-};
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/store.hh b/src/include/simeng/arch/aarch64/helpers/store.hh
deleted file mode 100644
index 18d3d6f915..0000000000
--- a/src/include/simeng/arch/aarch64/helpers/store.hh
+++ /dev/null
@@ -1,14 +0,0 @@
-#pragma once
-
-#include "auxiliaryFunctions.hh"
-
-namespace simeng {
-namespace arch {
-namespace aarch64 {
-class storeHelp {
- public:
-  static void tempFunc() { return; }
-};
-}  // namespace aarch64
-}  // namespace arch
-}  // namespace simeng
\ No newline at end of file
diff --git a/src/include/simeng/arch/aarch64/helpers/sve.hh b/src/include/simeng/arch/aarch64/helpers/sve.hh
index 0e9a800f9f..fa8df45589 100644
--- a/src/include/simeng/arch/aarch64/helpers/sve.hh
+++ b/src/include/simeng/arch/aarch64/helpers/sve.hh
@@ -8,1713 +8,1714 @@
 namespace simeng {
 namespace arch {
 namespace aarch64 {
-class sveHelp {
- public:
-  /** Helper function for SVE instructions with the format `add zd, zn, zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveAdd_3ops(std::vector<RegisterValue>& operands,
-                                   const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = n[i] + m[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `add zd, zn, #imm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveAdd_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const T imm = static_cast<T>(metadata.operands[2].imm);
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = n[i] + imm;
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `add zdn, pg/m, zdn,
-   * const`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveAddPredicated_const(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    bool isFP = std::is_floating_point<T>::value;
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* d = operands[1].getAsVector<T>();
-    const auto con = isFP ? metadata.operands[3].fp : metadata.operands[3].imm;
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = d[i] + con;
-      else
-        out[i] = d[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `add zdn, pg/m, zdn,
-   * zm`.
-   * T represents the type of operands (e.g. for zdn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveAddPredicated_vecs(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* d = operands[1].getAsVector<T>();
-    const T* m = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = d[i] + m[i];
-      else
-        out[i] = d[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for NEON instructions with the format `addv dd, pg, zn`.
-   * T represents the type of operands (e.g. for zn.s, T = uint32_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveAddvPredicated(std::vector<RegisterValue>& operands,
-                                         const uint16_t VL_bits) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* n = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    uint64_t out = 0;
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out += static_cast<uint64_t>(n[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `adr zd, [zn, zm{,
-   * lsl #<1,2,3>}]`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveAdr_packedOffsets(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
 
-    const int mbytes = 1 << metadata.operands[2].shift.value;
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = n[i] + (m[i] * mbytes);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for instructions with the format `cmp<eq, ge, gt, hi, hs,
-   *le, lo, ls, lt, ne> pd, pg/z, zn, <zm, #imm>`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns tuple of type [pred result (array of 4 uint64_t), nzcv]. */
-  template <typename T>
-  static std::tuple<std::array<uint64_t, 4>, uint8_t> sveCmpPredicated_toPred(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits, bool cmpToImm, std::function<bool(T, T)> func) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* n = operands[1].getAsVector<T>();
-    const T* m;
-    T imm;
-    if (cmpToImm)
-      imm = static_cast<T>(metadata.operands[3].imm);
-    else
-      m = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    std::array<uint64_t, 4> out = {0, 0, 0, 0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        if (cmpToImm)
-          out[i / (64 / sizeof(T))] |= (func(n[i], imm)) ? (shifted_active) : 0;
-        else
-          out[i / (64 / sizeof(T))] |=
-              (func(n[i], m[i])) ? (shifted_active) : 0;
-      }
-    }
-    // Byte count = sizeof(T) as destination predicate is predicate of T bytes.
-    return {out, AuxFunc::getNZCVfromPred(out, VL_bits, sizeof(T))};
-  }
-
-  /** Helper function for SVE instructions with the format `cnt<b,d,h,s> rd{,
-   * pattern{, #imm}}`.
-   * T represents the type of operation (e.g. for CNTD, T = uint64_t).
-   * Returns single value of type uint64_t. */
-  template <typename T>
-  static uint64_t sveCnt_gpr(
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const uint8_t imm = static_cast<uint8_t>(metadata.operands[1].imm);
-
-    const uint16_t elems =
-        AuxFunc::sveGetPattern(metadata.operandStr, (sizeof(T) * 8), VL_bits);
-    return (uint64_t)(elems * imm);
-  }
-
-  /** Helper function for SVE instructions with the format `cntp xd, pg, pn`.
-   * T represents the type of operands (e.g. for pn.d, T = uint64_t).
-   * Returns single value of type uint64_t. */
-  template <typename T>
-  static uint64_t sveCntp(std::vector<RegisterValue>& operands,
+/** Helper function for SVE instructions with the format `add zd, zn, zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveAdd_3ops(std::vector<RegisterValue>& operands,
                           const uint16_t VL_bits) {
-    const uint64_t* pg = operands[0].getAsVector<uint64_t>();
-    const uint64_t* pn = operands[1].getAsVector<uint64_t>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    uint64_t count = 0;
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (pg[i / (64 / sizeof(T))] & shifted_active) {
-        count += (pn[i / (64 / sizeof(T))] & shifted_active) ? 1 : 0;
-      }
-    }
-    return count;
-  }
-
-  /** Helper function for SVE instructions with the format `fcm<ge, lt,...> pd,
-   * pg/z, zn, zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns an array of 4 uint64_t elements. */
-  template <typename T>
-  static std::array<uint64_t, 4> sveComparePredicated_vecsToPred(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits, bool cmpToZero, std::function<bool(T, T)> func) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* n = operands[1].getAsVector<T>();
-    const T* m;
-    if (!cmpToZero) m = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    std::array<uint64_t, 4> out = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i / (64 / sizeof(T))] |=
-            (func(n[i], cmpToZero ? 0.0 : m[i])) ? shifted_active : 0;
-      }
-    }
-    return out;
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = n[i] + m[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `add zd, zn, #imm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveAdd_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  const T imm = static_cast<T>(metadata.operands[2].imm);
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = n[i] + imm;
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `add zdn, pg/m, zdn,
+ * const`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveAddPredicated_const(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  bool isFP = std::is_floating_point<T>::value;
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* d = operands[1].getAsVector<T>();
+  const auto con = isFP ? metadata.operands[3].fp : metadata.operands[3].imm;
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = d[i] + con;
+    else
+      out[i] = d[i];
   }
+  return {out, 256};
+}
 
-  /** Helper function for SVE instructions with the format `cpy zd, pg/z, #imm{,
-   * shift}`.
-   * T represents the type of operands (e.g. for zd.d, T = int64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveCpy_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const int16_t imm = metadata.operands[2].imm;
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i] = imm;
-      } else {
-        out[i] = 0;
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `dec<b,d,h,s> xdn{,
-   * pattern{, MUL #imm}}`.
-   * T represents the type of operation (e.g. for DECD, T = uint64_t).
-   * Returns single value of type uint64_t. */
-  template <typename T>
-  static int64_t sveDec_scalar(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const int64_t n = operands[0].get<int64_t>();
-    const uint8_t imm = static_cast<uint8_t>(metadata.operands[1].imm);
-    const uint16_t elems =
-        AuxFunc::sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits);
-    return (n - static_cast<int64_t>(elems * imm));
-  }
-
-  /** Helper function for SVE instructions with the format `dup zd, <#imm{,
-   * shift}, <w,x>n>`.
-   * T represents the type of operands (e.g. for zd.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveDup_immOrScalar(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits, bool useImm) {
-    bool isFP = std::is_floating_point<T>::value;
-    T imm;
-    if (useImm)
-      imm = isFP ? metadata.operands[1].fp
-                 : static_cast<int8_t>(metadata.operands[1].imm);
+/** Helper function for SVE instructions with the format `add zdn, pg/m, zdn,
+ * zm`.
+ * T represents the type of operands (e.g. for zdn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveAddPredicated_vecs(std::vector<RegisterValue>& operands,
+                                    const uint16_t VL_bits) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* d = operands[1].getAsVector<T>();
+  const T* m = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = d[i] + m[i];
     else
-      imm = operands[0].get<T>();
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = imm;
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `dup zd, zn[#imm]`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveDup_vecIndexed(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const uint16_t index =
-        static_cast<uint16_t>(metadata.operands[1].vector_index);
-    const T* n = operands[0].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    if (index < (VL_bits / (sizeof(T) * 8))) {
-      const T element = n[index];
-      for (int i = 0; i < partition_num; i++) {
-        out[i] = element;
-      }
-    }
-    return {out, 256};
+      out[i] = d[i];
   }
+  return {out, 256};
+}
 
-  /** Helper function for SVE instructions with the format `fabs zd,
-   * pg/z, zn`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFabsPredicated(std::vector<RegisterValue>& operands,
-                                         const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
+/** Helper function for NEON instructions with the format `addv dd, pg, zn`.
+ * T represents the type of operands (e.g. for zn.s, T = uint32_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveAddvPredicated(std::vector<RegisterValue>& operands,
+                                const uint16_t VL_bits) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* n = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  uint64_t out = 0;
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out += static_cast<uint64_t>(n[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `adr zd, [zn, zm{,
+ * lsl #<1,2,3>}]`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveAdr_packedOffsets(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  const int mbytes = 1 << metadata.operands[2].shift.value;
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = n[i] + (m[i] * mbytes);
+  }
+  return {out, 256};
+}
+
+/** Helper function for instructions with the format `cmp<eq, ge, gt, hi, hs,
+ *le, lo, ls, lt, ne> pd, pg/z, zn, <zm, #imm>`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns tuple of type [pred result (array of 4 uint64_t), nzcv]. */
+template <typename T>
+std::tuple<std::array<uint64_t, 4>, uint8_t> sveCmpPredicated_toPred(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits, bool cmpToImm, std::function<bool(T, T)> func) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* n = operands[1].getAsVector<T>();
+  const T* m;
+  T imm;
+  if (cmpToImm)
+    imm = static_cast<T>(metadata.operands[3].imm);
+  else
+    m = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  std::array<uint64_t, 4> out = {0, 0, 0, 0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      if (cmpToImm)
+        out[i / (64 / sizeof(T))] |= (func(n[i], imm)) ? (shifted_active) : 0;
+      else
+        out[i / (64 / sizeof(T))] |= (func(n[i], m[i])) ? (shifted_active) : 0;
+    }
+  }
+  // Byte count = sizeof(T) as destination predicate is predicate of T bytes.
+  return {out, getNZCVfromPred(out, VL_bits, sizeof(T))};
+}
+
+/** Helper function for SVE instructions with the format `cnt<b,d,h,s> rd{,
+ * pattern{, #imm}}`.
+ * T represents the type of operation (e.g. for CNTD, T = uint64_t).
+ * Returns single value of type uint64_t. */
+template <typename T>
+uint64_t sveCnt_gpr(const simeng::arch::aarch64::InstructionMetadata& metadata,
+                    const uint16_t VL_bits) {
+  const uint8_t imm = static_cast<uint8_t>(metadata.operands[1].imm);
+
+  const uint16_t elems =
+      sveGetPattern(metadata.operandStr, (sizeof(T) * 8), VL_bits);
+  return (uint64_t)(elems * imm);
+}
+
+/** Helper function for SVE instructions with the format `cntp xd, pg, pn`.
+ * T represents the type of operands (e.g. for pn.d, T = uint64_t).
+ * Returns single value of type uint64_t. */
+template <typename T>
+uint64_t sveCntp(std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
+  const uint64_t* pg = operands[0].getAsVector<uint64_t>();
+  const uint64_t* pn = operands[1].getAsVector<uint64_t>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  uint64_t count = 0;
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (pg[i / (64 / sizeof(T))] & shifted_active) {
+      count += (pn[i / (64 / sizeof(T))] & shifted_active) ? 1 : 0;
+    }
+  }
+  return count;
+}
+
+/** Helper function for SVE instructions with the format `fcm<ge, lt,...> pd,
+ * pg/z, zn, zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns an array of 4 uint64_t elements. */
+template <typename T>
+std::array<uint64_t, 4> sveComparePredicated_vecsToPred(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits, bool cmpToZero, std::function<bool(T, T)> func) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* n = operands[1].getAsVector<T>();
+  const T* m;
+  if (!cmpToZero) m = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  std::array<uint64_t, 4> out = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i / (64 / sizeof(T))] |=
+          (func(n[i], cmpToZero ? 0.0 : m[i])) ? shifted_active : 0;
+    }
+  }
+  return out;
+}
+
+/** Helper function for SVE instructions with the format `cpy zd, pg/z, #imm{,
+ * shift}`.
+ * T represents the type of operands (e.g. for zd.d, T = int64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveCpy_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const int16_t imm = metadata.operands[2].imm;
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i] = imm;
+    } else {
+      out[i] = 0;
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `dec<b,d,h,s> xdn{,
+ * pattern{, MUL #imm}}`.
+ * T represents the type of operation (e.g. for DECD, T = uint64_t).
+ * Returns single value of type uint64_t. */
+template <typename T>
+int64_t sveDec_scalar(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const int64_t n = operands[0].get<int64_t>();
+  const uint8_t imm = static_cast<uint8_t>(metadata.operands[1].imm);
+  const uint16_t elems =
+      sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits);
+  return (n - static_cast<int64_t>(elems * imm));
+}
+
+/** Helper function for SVE instructions with the format `dup zd, <#imm{,
+ * shift}, <w,x>n>`.
+ * T represents the type of operands (e.g. for zd.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveDup_immOrScalar(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits, bool useImm) {
+  bool isFP = std::is_floating_point<T>::value;
+  T imm;
+  if (useImm)
+    imm = isFP ? metadata.operands[1].fp
+               : static_cast<int8_t>(metadata.operands[1].imm);
+  else
+    imm = operands[0].get<T>();
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = imm;
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `dup zd, zn[#imm]`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveDup_vecIndexed(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const uint16_t index =
+      static_cast<uint16_t>(metadata.operands[1].vector_index);
+  const T* n = operands[0].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  if (index < (VL_bits / (sizeof(T) * 8))) {
+    const T element = n[index];
     for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i] = ::fabs(n[i]);
-      } else {
-        out[i] = d[i];
-      }
+      out[i] = element;
     }
-    return {out, 256};
   }
+  return {out, 256};
+}
 
-  /** Helper function for SVE instructions with the format `fadda rd,
-   * pg, rn, zm`.
-   * T represents the type of operands (e.g. for zm.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFaddaPredicated(std::vector<RegisterValue>& operands,
-                                          const uint16_t VL_bits) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T n = operands[1].get<T>();
-    const T* m = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    out[0] = n;
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[0] += m[i];
+/** Helper function for SVE instructions with the format `fabs zd,
+ * pg/z, zn`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFabsPredicated(std::vector<RegisterValue>& operands,
+                                const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i] = ::fabs(n[i]);
+    } else {
+      out[i] = d[i];
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fadda rd,
+ * pg, rn, zm`.
+ * T represents the type of operands (e.g. for zm.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFaddaPredicated(std::vector<RegisterValue>& operands,
+                                 const uint16_t VL_bits) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T n = operands[1].get<T>();
+  const T* m = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  out[0] = n;
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[0] += m[i];
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fcadd zdn, pg/m,
+ * zdn, zm, #imm`.
+ * T represents the type of operands (e.g. for zm.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFcaddPredicated(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* dn = operands[1].getAsVector<T>();
+  const T* m = operands[2].getAsVector<T>();
+  const uint32_t imm = metadata.operands[4].imm;
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < (partition_num / 2); i++) {
+    T acc_r = dn[2 * i];
+    T acc_i = dn[2 * i + 1];
+    T elt2_r = m[2 * i];
+    T elt2_i = m[2 * i + 1];
+
+    uint64_t shifted_active1 = 1ull
+                               << (((2 * i) % (64 / sizeof(T))) * sizeof(T));
+    uint64_t shifted_active2 =
+        1ull << (((2 * i + 1) % (64 / sizeof(T))) * sizeof(T));
+    if (p[(2 * i) / (64 / sizeof(T))] & shifted_active1) {
+      if (imm == 90) {
+        elt2_i = 0.0 - elt2_i;
       }
+      acc_r = acc_r + elt2_i;
     }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fcadd zdn, pg/m,
-   * zdn, zm, #imm`.
-   * T represents the type of operands (e.g. for zm.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFcaddPredicated(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* dn = operands[1].getAsVector<T>();
-    const T* m = operands[2].getAsVector<T>();
-    const uint32_t imm = metadata.operands[4].imm;
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < (partition_num / 2); i++) {
-      T acc_r = dn[2 * i];
-      T acc_i = dn[2 * i + 1];
-      T elt2_r = m[2 * i];
-      T elt2_i = m[2 * i + 1];
-
-      uint64_t shifted_active1 = 1ull
-                                 << (((2 * i) % (64 / sizeof(T))) * sizeof(T));
-      uint64_t shifted_active2 =
-          1ull << (((2 * i + 1) % (64 / sizeof(T))) * sizeof(T));
-      if (p[(2 * i) / (64 / sizeof(T))] & shifted_active1) {
-        if (imm == 90) {
-          elt2_i = 0.0 - elt2_i;
-        }
-        acc_r = acc_r + elt2_i;
+    if (p[(2 * i + 1) / (64 / sizeof(T))] & shifted_active2) {
+      if (imm == 270) {
+        elt2_r = 0.0 - elt2_r;
       }
-      if (p[(2 * i + 1) / (64 / sizeof(T))] & shifted_active2) {
-        if (imm == 270) {
-          elt2_r = 0.0 - elt2_r;
-        }
-        acc_i = acc_i + elt2_r;
+      acc_i = acc_i + elt2_r;
+    }
+    out[2 * i] = acc_r;
+    out[2 * i + 1] = acc_i;
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fcmla zda, pg/m,
+ * zn, zm, #imm`.
+ * T represents the type of operands (e.g. for zm.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFcmlaPredicated(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const T* da = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+  const T* m = operands[3].getAsVector<T>();
+  const uint32_t imm = metadata.operands[4].imm;
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  int sel_a = (imm == 0 || imm == 180) ? 0 : 1;
+  int sel_b = (imm == 0 || imm == 180) ? 1 : 0;
+  bool neg_i = (imm == 180 || imm == 270) ? true : false;
+  bool neg_r = (imm == 90 || imm == 180) ? true : false;
+  for (int i = 0; i < (partition_num / 2); i++) {
+    T addend_r = da[2 * i];
+    T addend_i = da[2 * i + 1];
+    T elt1_a = n[2 * i + sel_a];
+    T elt2_a = m[2 * i + sel_a];
+    T elt2_b = m[2 * i + sel_b];
+    uint64_t shifted_active1 = 1ull
+                               << (((2 * i) % (64 / sizeof(T))) * sizeof(T));
+    uint64_t shifted_active2 =
+        1ull << (((2 * i + 1) % (64 / sizeof(T))) * sizeof(T));
+    if (p[(2 * i) / (64 / sizeof(T))] & shifted_active1) {
+      if (neg_r) {
+        elt2_a = 0.0 - elt2_a;
       }
-      out[2 * i] = acc_r;
-      out[2 * i + 1] = acc_i;
+      addend_r = addend_r + (elt1_a * elt2_a);
     }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fcmla zda, pg/m,
-   * zn, zm, #imm`.
-   * T represents the type of operands (e.g. for zm.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFcmlaPredicated(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const T* da = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-    const T* m = operands[3].getAsVector<T>();
-    const uint32_t imm = metadata.operands[4].imm;
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    int sel_a = (imm == 0 || imm == 180) ? 0 : 1;
-    int sel_b = (imm == 0 || imm == 180) ? 1 : 0;
-    bool neg_i = (imm == 180 || imm == 270) ? true : false;
-    bool neg_r = (imm == 90 || imm == 180) ? true : false;
-    for (int i = 0; i < (partition_num / 2); i++) {
-      T addend_r = da[2 * i];
-      T addend_i = da[2 * i + 1];
-      T elt1_a = n[2 * i + sel_a];
-      T elt2_a = m[2 * i + sel_a];
-      T elt2_b = m[2 * i + sel_b];
-      uint64_t shifted_active1 = 1ull
-                                 << (((2 * i) % (64 / sizeof(T))) * sizeof(T));
-      uint64_t shifted_active2 =
-          1ull << (((2 * i + 1) % (64 / sizeof(T))) * sizeof(T));
-      if (p[(2 * i) / (64 / sizeof(T))] & shifted_active1) {
-        if (neg_r) {
-          elt2_a = 0.0 - elt2_a;
-        }
-        addend_r = addend_r + (elt1_a * elt2_a);
+    if (p[(2 * i + 1) / (64 / sizeof(T))] & shifted_active2) {
+      if (neg_i) {
+        elt2_b = 0.0 - elt2_b;
       }
-      if (p[(2 * i + 1) / (64 / sizeof(T))] & shifted_active2) {
-        if (neg_i) {
-          elt2_b = 0.0 - elt2_b;
-        }
-        addend_i = addend_i + (elt1_a * elt2_b);
-      }
-      out[2 * i] = addend_r;
-      out[2 * i + 1] = addend_i;
+      addend_i = addend_i + (elt1_a * elt2_b);
+    }
+    out[2 * i] = addend_r;
+    out[2 * i + 1] = addend_i;
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fcpy zd, pg/m,
+ * #const`.
+ * T represents the type of operands (e.g. for zd.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFcpy_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const T* dn = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T imm = metadata.operands[2].fp;
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i] = imm;
+    } else {
+      out[i] = dn[i];
     }
-    return {out, 256};
   }
+  return {out, 256};
+}
 
-  /** Helper function for SVE instructions with the format `fcpy zd, pg/m,
-   * #const`.
-   * T represents the type of operands (e.g. for zd.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFcpy_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const T* dn = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T imm = metadata.operands[2].fp;
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i] = imm;
-      } else {
-        out[i] = dn[i];
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fcvt zd,
-   * pg/m, zn`.
-   * D represents the destination vector register type (e.g. zd.s would be
-   * int32_t).
-   * N represents the source vector register type (e.g. zn.d would be double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N>
-  static RegisterValue sveFcvtPredicated(std::vector<RegisterValue>& operands,
-                                         const uint16_t VL_bits) {
-    const D* d = operands[0].getAsVector<D>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const N* n = operands[2].getAsVector<N>();
-
-    // Stores size of largest type out of D and N
-    int lts = std::max(sizeof(D), sizeof(N));
-    bool sourceLarger = (sizeof(D) < sizeof(N)) ? true : false;
-    bool sameDandN = (sizeof(D) == sizeof(N)) ? true : false;
-
-    const uint16_t partition_num = VL_bits / (lts * 8);
-    D out[256 / sizeof(D)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / lts)) * lts);
-      int indexOut = (sourceLarger) ? (2 * i) : i;
-      int indexN = (!sameDandN) && (!sourceLarger) ? (2 * i) : i;
-
-      if (p[i / (64 / lts)] & shifted_active) {
-        if (n[indexN] > std::numeric_limits<D>::max())
-          out[indexOut] = std::numeric_limits<D>::max();
-        else if (n[indexN] < std::numeric_limits<D>::lowest())
-          out[indexOut] = std::numeric_limits<D>::lowest();
-        else
-          out[indexOut] = static_cast<D>(n[indexN]);
-      } else {
-        out[indexOut] = d[indexOut];
-      }
-      if (sourceLarger) out[indexOut + 1] = d[indexOut + 1];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fcvtzs zd,
-   * pg/m, zn`.
-   * D represents the destination vector register type (e.g. zd.s would be
-   * int32_t).
-   * N represents the source vector register type (e.g. zn.d would be double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N>
-  static RegisterValue sveFcvtzsPredicated(std::vector<RegisterValue>& operands,
-                                           const uint16_t VL_bits) {
-    const D* d = operands[0].getAsVector<D>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const N* n = operands[2].getAsVector<N>();
-
-    // Stores size of largest type out of D and N
-    int lts = std::max(sizeof(D), sizeof(N));
-    bool sameType = (sizeof(D) == sizeof(N)) ? true : false;
-    bool sourceLarger = (sizeof(D) < sizeof(N)) ? true : false;
-
-    const uint16_t partition_num = VL_bits / (lts * 8);
-    D out[256 / sizeof(D)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / lts)) * lts);
-      int indexOut = (sourceLarger) ? (2 * i) : i;
-      int indexN = ((!sourceLarger) & (!sameType)) ? (2 * i) : i;
-
-      if (p[i / (64 / lts)] & shifted_active) {
-        if (n[indexN] > std::numeric_limits<D>::max())
-          out[indexOut] = std::numeric_limits<D>::max();
-        else if (n[indexN] < std::numeric_limits<D>::lowest())
-          out[indexOut] = std::numeric_limits<D>::lowest();
-        else
-          out[indexOut] = static_cast<D>(std::trunc(n[indexN]));
-        // Can be set to 0xFFFFFFFF as will only occur when D=int32_t.
-        if (sourceLarger) out[indexOut + 1] = (n[indexN] < 0) ? 0xFFFFFFFFu : 0;
-      } else {
-        out[indexOut] = d[indexOut];
-        if (sourceLarger) out[indexOut + 1] = d[indexOut + 1];
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fmad zd, pg/m, zn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFmadPredicated_vecs(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-    const T* m = operands[3].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = m[i] + (d[i] * n[i]);
-      else
-        out[i] = d[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fmls zd, pg/m, zn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFmlsPredicated_vecs(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-    const T* m = operands[3].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = d[i] + (-n[i] * m[i]);
+/** Helper function for SVE instructions with the format `fcvt zd,
+ * pg/m, zn`.
+ * D represents the destination vector register type (e.g. zd.s would be
+ * int32_t).
+ * N represents the source vector register type (e.g. zn.d would be double).
+ * Returns correctly formatted RegisterValue. */
+template <typename D, typename N>
+RegisterValue sveFcvtPredicated(std::vector<RegisterValue>& operands,
+                                const uint16_t VL_bits) {
+  const D* d = operands[0].getAsVector<D>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const N* n = operands[2].getAsVector<N>();
+
+  // Stores size of largest type out of D and N
+  int lts = std::max(sizeof(D), sizeof(N));
+  bool sourceLarger = (sizeof(D) < sizeof(N)) ? true : false;
+  bool sameDandN = (sizeof(D) == sizeof(N)) ? true : false;
+
+  const uint16_t partition_num = VL_bits / (lts * 8);
+  D out[256 / sizeof(D)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / lts)) * lts);
+    int indexOut = (sourceLarger) ? (2 * i) : i;
+    int indexN = (!sameDandN) && (!sourceLarger) ? (2 * i) : i;
+
+    if (p[i / (64 / lts)] & shifted_active) {
+      if (n[indexN] > std::numeric_limits<D>::max())
+        out[indexOut] = std::numeric_limits<D>::max();
+      else if (n[indexN] < std::numeric_limits<D>::lowest())
+        out[indexOut] = std::numeric_limits<D>::lowest();
       else
-        out[i] = d[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fmsb zd, pg/m, zn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFmsbPredicated_vecs(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-    const T* m = operands[3].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = m[i] + (-d[i] * n[i]);
+        out[indexOut] = static_cast<D>(n[indexN]);
+    } else {
+      out[indexOut] = d[indexOut];
+    }
+    if (sourceLarger) out[indexOut + 1] = d[indexOut + 1];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fcvtzs zd,
+ * pg/m, zn`.
+ * D represents the destination vector register type (e.g. zd.s would be
+ * int32_t).
+ * N represents the source vector register type (e.g. zn.d would be double).
+ * Returns correctly formatted RegisterValue. */
+template <typename D, typename N>
+RegisterValue sveFcvtzsPredicated(std::vector<RegisterValue>& operands,
+                                  const uint16_t VL_bits) {
+  const D* d = operands[0].getAsVector<D>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const N* n = operands[2].getAsVector<N>();
+
+  // Stores size of largest type out of D and N
+  int lts = std::max(sizeof(D), sizeof(N));
+  bool sameType = (sizeof(D) == sizeof(N)) ? true : false;
+  bool sourceLarger = (sizeof(D) < sizeof(N)) ? true : false;
+
+  const uint16_t partition_num = VL_bits / (lts * 8);
+  D out[256 / sizeof(D)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / lts)) * lts);
+    int indexOut = (sourceLarger) ? (2 * i) : i;
+    int indexN = ((!sourceLarger) & (!sameType)) ? (2 * i) : i;
+
+    if (p[i / (64 / lts)] & shifted_active) {
+      if (n[indexN] > std::numeric_limits<D>::max())
+        out[indexOut] = std::numeric_limits<D>::max();
+      else if (n[indexN] < std::numeric_limits<D>::lowest())
+        out[indexOut] = std::numeric_limits<D>::lowest();
       else
-        out[i] = d[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fmul zd, zn, zm`.
-   * T represents the type of operands (e.g. for zn.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFmul_3ops(std::vector<RegisterValue>& operands,
-                                    const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = n[i] * m[i];
+        out[indexOut] = static_cast<D>(std::trunc(n[indexN]));
+      // Can be set to 0xFFFFFFFF as will only occur when D=int32_t.
+      if (sourceLarger) out[indexOut + 1] = (n[indexN] < 0) ? 0xFFFFFFFFu : 0;
+    } else {
+      out[indexOut] = d[indexOut];
+      if (sourceLarger) out[indexOut + 1] = d[indexOut + 1];
     }
-    return {out, 256};
   }
+  return {out, 256};
+}
 
-  /** Helper function for SVE instructions with the format `fneg zd, pg/m, zn`.
-   * T represents the type of operands (e.g. for zn.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFnegPredicated(std::vector<RegisterValue>& operands,
-                                         const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = -n[i];
-      else
-        out[i] = d[i];
-    }
-    return {out, 256};
+/** Helper function for SVE instructions with the format `fmad zd, pg/m, zn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFmadPredicated_vecs(std::vector<RegisterValue>& operands,
+                                     const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+  const T* m = operands[3].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = m[i] + (d[i] * n[i]);
+    else
+      out[i] = d[i];
   }
+  return {out, 256};
+}
 
-  /** Helper function for SVE instructions with the format `fnmls zd, pg/m, zn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFnmlsPredicated(std::vector<RegisterValue>& operands,
-                                          const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-    const T* m = operands[3].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = -d[i] + (n[i] * m[i]);
-      else
-        out[i] = d[i];
-    }
-    return {out, 256};
+/** Helper function for SVE instructions with the format `fmls zd, pg/m, zn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFmlsPredicated_vecs(std::vector<RegisterValue>& operands,
+                                     const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+  const T* m = operands[3].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = d[i] + (-n[i] * m[i]);
+    else
+      out[i] = d[i];
   }
+  return {out, 256};
+}
 
-  /** Helper function for SVE instructions with the format `fnmsb zdn, pg/m, zm,
-   * za`.
-   * T represents the type of operands (e.g. for zdn.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFnmsbPredicated(std::vector<RegisterValue>& operands,
-                                          const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* m = operands[2].getAsVector<T>();
-    const T* a = operands[3].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
+/** Helper function for SVE instructions with the format `fmsb zd, pg/m, zn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFmsbPredicated_vecs(std::vector<RegisterValue>& operands,
+                                     const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+  const T* m = operands[3].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = m[i] + (-d[i] * n[i]);
+    else
+      out[i] = d[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fmul zd, zn, zm`.
+ * T represents the type of operands (e.g. for zn.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFmul_3ops(std::vector<RegisterValue>& operands,
+                           const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = n[i] * m[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fneg zd, pg/m, zn`.
+ * T represents the type of operands (e.g. for zn.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFnegPredicated(std::vector<RegisterValue>& operands,
+                                const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
 
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = -a[i] + n[i] * m[i];
-      else
-        out[i] = n[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `frintn zd, pg/m,
-   * zn`.
-   * D represents the destination vector register type (e.g. zd.s would be
-   * int32_t).
-   * N represents the source vector register type (e.g. zn.d would be
-   * double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N>
-  static RegisterValue sveFrintnPredicated(std::vector<RegisterValue>& operands,
-                                           const uint16_t VL_bits) {
-    const D* d = operands[0].getAsVector<D>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const N* n = operands[2].getAsVector<N>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(N) * 8);
-    D out[256 / sizeof(D)] = {0};
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
 
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(N))) * sizeof(N));
-      if (p[i / (64 / sizeof(N))] & shifted_active) {
-        out[i] = AuxFunc::roundToNearestTiesToEven<N, D>(n[i]);
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = -n[i];
+    else
+      out[i] = d[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fnmls zd, pg/m, zn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFnmlsPredicated(std::vector<RegisterValue>& operands,
+                                 const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+  const T* m = operands[3].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = -d[i] + (n[i] * m[i]);
+    else
+      out[i] = d[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fnmsb zdn, pg/m, zm,
+ * za`.
+ * T represents the type of operands (e.g. for zdn.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFnmsbPredicated(std::vector<RegisterValue>& operands,
+                                 const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* m = operands[2].getAsVector<T>();
+  const T* a = operands[3].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = -a[i] + n[i] * m[i];
+    else
+      out[i] = n[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `frintn zd, pg/m,
+ * zn`.
+ * T represents the vector type (e.g. zd.s would be float).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+std::enable_if_t<std::is_floating_point_v<T>, RegisterValue>
+sveFrintnPredicated(std::vector<RegisterValue>& operands,
+                    const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      // Get truncation
+      T trunc = std::trunc(n[i]);
+      // On tie, round to nearest even
+      if (std::fabs(n[i] - trunc) == static_cast<T>(0.5)) {
+        T addand = (trunc > static_cast<T>(0.0)) ? static_cast<T>(1)
+                                                 : static_cast<T>(-1);
+        // If odd, add the addand
+        out[i] = (std::fmod(trunc, static_cast<T>(2.0)) == static_cast<T>(0.0))
+                     ? trunc
+                     : (trunc + addand);
       } else {
-        out[i] = d[i];
+        // Else, round to nearest
+        out[i] = std::round(n[i]);
       }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fsqrt zd,
-   * pg/m, zn`.
-   * T represents the type of operands (e.g. for zn.d, T = double).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveFsqrtPredicated_2vecs(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = ::sqrt(n[i]);
-      else
-        out[i] = d[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `inc<b, d, h, w>
-   * xdn{, pattern{, MUL #imm}}`.
-   * T represents the type of operation (e.g. for INCB, T = int8_t).
-   * Returns single value of type int64_t. */
-  template <typename T>
-  static int64_t sveInc_gprImm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const int64_t n = operands[0].get<int64_t>();
-    const uint8_t imm = static_cast<uint8_t>(metadata.operands[1].imm);
-    const uint16_t elems =
-        AuxFunc::sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits);
-    int64_t out = n + (elems * imm);
-    return out;
-  }
-
-  /** Helper function for SVE instructions with the format `inc<b, d, h, w>
-   * zdn{, pattern{, #imm}}`.
-   * T represents the type of operands (e.g. for zdn.d, T = int64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveInc_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const uint8_t imm = static_cast<uint8_t>(metadata.operands[1].imm);
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    typename std::make_signed<T>::type out[256 / sizeof(T)] = {0};
-    const uint16_t elems =
-        AuxFunc::sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits);
-
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = n[i] + (elems * imm);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `incp xdn, pm`.
-   * T represents the type of operands (e.g. for pm.d, T = uint64_t).
-   * Returns single value of type uint64_t. */
-  template <typename T>
-  static uint64_t sveIncp_gpr(std::vector<RegisterValue>& operands,
-                              const uint16_t VL_bits) {
-    const uint64_t dn = operands[0].get<uint64_t>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    uint64_t count = 0;
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        count++;
+    } else {
+      out[i] = d[i];
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fsqrt zd,
+ * pg/m, zn`.
+ * T represents the type of operands (e.g. for zn.d, T = double).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveFsqrtPredicated_2vecs(std::vector<RegisterValue>& operands,
+                                       const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = ::sqrt(n[i]);
+    else
+      out[i] = d[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `inc<b, d, h, w>
+ * xdn{, pattern{, MUL #imm}}`.
+ * T represents the type of operation (e.g. for INCB, T = int8_t).
+ * Returns single value of type int64_t. */
+template <typename T>
+int64_t sveInc_gprImm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const int64_t n = operands[0].get<int64_t>();
+  const uint8_t imm = static_cast<uint8_t>(metadata.operands[1].imm);
+  const uint16_t elems =
+      sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits);
+  int64_t out = n + (elems * imm);
+  return out;
+}
+
+/** Helper function for SVE instructions with the format `inc<b, d, h, w>
+ * zdn{, pattern{, #imm}}`.
+ * T represents the type of operands (e.g. for zdn.d, T = int64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveInc_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  const uint8_t imm = static_cast<uint8_t>(metadata.operands[1].imm);
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  typename std::make_signed<T>::type out[256 / sizeof(T)] = {0};
+  const uint16_t elems =
+      sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits);
+
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = n[i] + (elems * imm);
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `incp xdn, pm`.
+ * T represents the type of operands (e.g. for pm.d, T = uint64_t).
+ * Returns single value of type uint64_t. */
+template <typename T>
+uint64_t sveIncp_gpr(std::vector<RegisterValue>& operands,
+                     const uint16_t VL_bits) {
+  const uint64_t dn = operands[0].get<uint64_t>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  uint64_t count = 0;
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      count++;
+    }
+  }
+  return dn + count;
+}
+
+/** Helper function for SVE instructions with the format `index zd, <#imm,
+ * rn>, <#imm, rm>`.
+ * D represents the vector register type (e.g. zd.b would be int8_t).
+ * N represents the GPR type (e.g. for xn, xm, D = int64).
+ * Returns correctly formatted RegisterValue. */
+template <typename D, typename N = int8_t>
+RegisterValue sveIndex(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits, bool op1isImm, bool op2isImm) {
+  const int op2Index = op1isImm ? 0 : 1;
+  const auto n = op1isImm ? static_cast<int8_t>(metadata.operands[1].imm)
+                          : static_cast<N>(operands[0].get<N>());
+  const auto m = op2isImm ? static_cast<int8_t>(metadata.operands[2].imm)
+                          : static_cast<N>(operands[op2Index].get<N>());
+
+  const uint16_t partition_num = VL_bits / (sizeof(D) * 8);
+  D out[256 / sizeof(D)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = static_cast<D>(n + (i * m));
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `<AND, EOR, ...>
+ * pd, pg/z, pn, pm`.
+ * T represents the type of operands (e.g. for pn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+std::array<uint64_t, 4> sveLogicOp_preds(
+    std::vector<RegisterValue>& operands, const uint16_t VL_bits,
+    std::function<uint64_t(uint64_t, uint64_t)> func) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const uint64_t* n = operands[1].getAsVector<uint64_t>();
+  const uint64_t* m = operands[2].getAsVector<uint64_t>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  std::array<uint64_t, 4> out = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i / (64 / sizeof(T))] |=
+          (func(n[i / (64 / sizeof(T))], m[i / (64 / sizeof(T))]) &
+           shifted_active);
+    }
+  }
+  return out;
+}
+
+/** Helper function for SVE instructions with the format `<AND, EOR, ...>
+ * zd, pg/m, zn, zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveLogicOpPredicated_3vecs(std::vector<RegisterValue>& operands,
+                                         const uint16_t VL_bits,
+                                         std::function<T(T, T)> func) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* dn = operands[1].getAsVector<T>();
+  const T* m = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = func(dn[i], m[i]);
+    else
+      out[i] = dn[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `<AND, EOR, ...>
+ * zd, zn, zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveLogicOpUnPredicated_3vecs(std::vector<RegisterValue>& operands,
+                                           const uint16_t VL_bits,
+                                           std::function<T(T, T)> func) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = func(n[i], m[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `lsl sz, zn, #imm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveLsl_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  const T imm = static_cast<T>(metadata.operands[2].imm);
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  typename std::make_signed<T>::type out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = (n[i] << imm);
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `max zdn, zdn,
+ * #imm`.
+ * T represents the type of operands (e.g. for zdn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveMax_vecImm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  T imm = static_cast<T>(metadata.operands[2].imm);
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = std::max(n[i], imm);
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `max zdn, zdn,
+ * #imm`.
+ * T represents the type of operands (e.g. for zdn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveMaxPredicated_vecs(std::vector<RegisterValue>& operands,
+                                    const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+  const T* m = operands[3].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i] = std::max(n[i], m[i]);
+    } else
+      out[i] = d[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fmla zd, pg/m, zn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveMlaPredicated_vecs(std::vector<RegisterValue>& operands,
+                                    const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+  const T* m = operands[3].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = d[i] + (n[i] * m[i]);
+    else
+      out[i] = d[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `fmla zda, zn,
+ * zm[index]`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveMlaIndexed_vecs(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const T* n = operands[1].getAsVector<T>();
+  const T* m = operands[2].getAsVector<T>();
+  const size_t index = static_cast<size_t>(metadata.operands[2].vector_index);
+
+  const uint16_t elemsPer128 = 128 / (sizeof(T) * 8);
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (size_t i = 0; i < partition_num; i += elemsPer128) {
+    const T zm_elem = m[i + index];
+    for (size_t j = 0; j < elemsPer128; j++) {
+      out[i + j] = d[i + j] + (n[i + j] * zm_elem);
+    }
+  }
+
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `movprfx zd,
+ * pg/z, zn`.
+ * T represents the type of operands (e.g. for zd.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveMovprfxPredicated_destToZero(
+    std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
+  // TODO: Adopt hint logic of the MOVPRFX instruction
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* n = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i] = n[i];
+    } else {
+      out[i] = 0;
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `movprfx zd,
+ * pg/m, zn`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveMovprfxPredicated_destUnchanged(
+    std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
+  // TODO: Adopt hint logic of the MOVPRFX instruction
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i] = n[i];
+    } else {
+      out[i] = d[i];
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `mul zdn, pg/m, zdn,
+ * <zm, #imm>`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveMulPredicated(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits, bool useImm) {
+  bool isFP = std::is_floating_point<T>::value;
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* n = operands[1].getAsVector<T>();
+  const T* m;
+  T imm;
+  if (useImm)
+    imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm;
+  else
+    m = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i] = n[i] * (useImm ? imm : m[i]);
+    } else
+      out[i] = n[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `mulh zdn, pg/m, zdn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.s, T = int32_t).
+ * TT represents the type twice the length of T (e.g. for T = int8_t, TT =
+ * int16_T).
+ * Returns correctly formatted RegisterValue. */
+// TODO : Support for int64_t mulh operations.
+template <typename T, typename TT>
+RegisterValue sveMulhPredicated(std::vector<RegisterValue>& operands,
+                                const uint16_t VL_bits) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* n = operands[1].getAsVector<T>();
+  const T* m = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      bool isNeg = false;
+      T a = n[i];
+      T b = m[i];
+      if (a < 0) {
+        isNeg = !isNeg;
+        a = 0 - a;
       }
-    }
-    return dn + count;
-  }
-
-  /** Helper function for SVE instructions with the format `index zd, <#imm,
-   * rn>, <#imm, rm>`.
-   * D represents the vector register type (e.g. zd.b would be int8_t).
-   * N represents the GPR type (e.g. for xn, xm, D = int64).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N = int8_t>
-  static RegisterValue sveIndex(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits, bool op1isImm, bool op2isImm) {
-    const int op2Index = op1isImm ? 0 : 1;
-    const auto n = op1isImm ? static_cast<int8_t>(metadata.operands[1].imm)
-                            : static_cast<N>(operands[0].get<N>());
-    const auto m = op2isImm ? static_cast<int8_t>(metadata.operands[2].imm)
-                            : static_cast<N>(operands[op2Index].get<N>());
-
-    const uint16_t partition_num = VL_bits / (sizeof(D) * 8);
-    D out[256 / sizeof(D)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = static_cast<D>(n + (i * m));
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `<AND, EOR, ...>
-   * pd, pg/z, pn, pm`.
-   * T represents the type of operands (e.g. for pn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static std::array<uint64_t, 4> sveLogicOp_preds(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits,
-      std::function<uint64_t(uint64_t, uint64_t)> func) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const uint64_t* n = operands[1].getAsVector<uint64_t>();
-    const uint64_t* m = operands[2].getAsVector<uint64_t>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    std::array<uint64_t, 4> out = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i / (64 / sizeof(T))] |=
-            (func(n[i / (64 / sizeof(T))], m[i / (64 / sizeof(T))]) &
-             shifted_active);
+      if (b < 0) {
+        isNeg = !isNeg;
+        b = 0 - b;
       }
-    }
-    return out;
-  }
-
-  /** Helper function for SVE instructions with the format `<AND, EOR, ...>
-   * zd, pg/m, zn, zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveLogicOpPredicated_3vecs(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits,
-      std::function<T(T, T)> func) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* dn = operands[1].getAsVector<T>();
-    const T* m = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
+      TT tmp = (static_cast<TT>(a) * static_cast<TT>(b));
+      if (isNeg) tmp = 0 - tmp;
+
+      out[i] = static_cast<T>(tmp >> (sizeof(T) * 8));
+    } else
+      out[i] = n[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `orr zd, zn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveOrr_3vecs(std::vector<RegisterValue>& operands,
+                           const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = n[i] | m[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE2 instructions with the format `psel pd, pn,
+ * pm.t[wa, #imm]`.
+ * T represents the type of operands (e.g. for pm.d, T =
+ * uint64_t). Returns an array of 4 uint64_t elements. */
+template <typename T>
+std::array<uint64_t, 4> svePsel(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const uint64_t* pn = operands[0].getAsVector<uint64_t>();
+  const uint64_t* pm = operands[1].getAsVector<uint64_t>();
+  const uint32_t wa = operands[2].get<uint32_t>();
+  const uint32_t imm = metadata.operands[2].sme_index.disp;
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+
+  uint32_t index = (wa + imm) % partition_num;
+  uint64_t shifted_active = 1ull << ((index % (64 / sizeof(T))) * sizeof(T));
+
+  std::array<uint64_t, 4> out = {0, 0, 0, 0};
+  if (pm[index / (64 / sizeof(T))] & shifted_active) {
+    out = {pn[0], pn[1], pn[2], pn[3]};
+  }
+
+  return out;
+}
+
+/** Helper function for SVE instructions with the format `ptrue pd{,
+ * pattern}.
+ * T represents the type of operands (e.g. for pd.d, T = uint64_t).
+ * Returns an array of 4 uint64_t elements. */
+template <typename T>
+std::array<uint64_t, 4> svePtrue(
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  std::array<uint64_t, 4> out = {0, 0, 0, 0};
+
+  // Get pattern
+  const uint16_t count =
+      sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits);
+  // Exit early if count == 0
+  if (count == 0) return out;
+
+  for (int i = 0; i < partition_num; i++) {
+    if (i < count) {
       uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = func(dn[i], m[i]);
-      else
-        out[i] = dn[i];
+      out[i / (64 / sizeof(T))] |= shifted_active;
     }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `<AND, EOR, ...>
-   * zd, zn, zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveLogicOpUnPredicated_3vecs(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits,
-      std::function<T(T, T)> func) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = func(n[i], m[i]);
-    }
-    return {out, 256};
   }
+  return out;
+}
 
-  /** Helper function for SVE instructions with the format `lsl sz, zn, #imm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveLsl_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const T imm = static_cast<T>(metadata.operands[2].imm);
+/** Helper function for SVE instructions with the format `punpk<hi,lo> pd.h,
+ * pn.b`.
+ * If `isHI` = false, then PUNPKLO is performed.
+ * Returns an array of 4 uint64_t elements. */
+std::array<uint64_t, 4> svePunpk(std::vector<RegisterValue>& operands,
+                                 const uint16_t VL_bits, bool isHi) {
+  const uint64_t* n = operands[0].getAsVector<uint64_t>();
 
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    typename std::make_signed<T>::type out[256 / sizeof(T)] = {0};
+  const uint16_t partition_num = VL_bits / 8;
+  std::array<uint64_t, 4> out = {0, 0, 0, 0};
+  uint16_t index = isHi ? (partition_num / 2) : 0;
 
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = (n[i] << imm);
+  for (int i = 0; i < partition_num / 2; i++) {
+    if (n[index / 64] & 1ull << index % 64) {
+      out[i / 32] |= 1ull << ((i * 2) % 64);
     }
-    return {out, 256};
+    index++;
   }
+  return out;
+}
 
-  /** Helper function for SVE instructions with the format `max zdn, zdn,
-   * #imm`.
-   * T represents the type of operands (e.g. for zdn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveMax_vecImm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    T imm = static_cast<T>(metadata.operands[2].imm);
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = std::max(n[i], imm);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `max zdn, zdn,
-   * #imm`.
-   * T represents the type of operands (e.g. for zdn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveMaxPredicated_vecs(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-    const T* m = operands[3].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i] = std::max(n[i], m[i]);
-      } else
-        out[i] = d[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fmla zd, pg/m, zn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveMlaPredicated_vecs(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-    const T* m = operands[3].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = d[i] + (n[i] * m[i]);
-      else
-        out[i] = d[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `fmla zda, zn,
-   * zm[index]`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveMlaIndexed_vecs(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const T* n = operands[1].getAsVector<T>();
-    const T* m = operands[2].getAsVector<T>();
-    const size_t index = static_cast<size_t>(metadata.operands[2].vector_index);
-
-    const uint16_t elemsPer128 = 128 / (sizeof(T) * 8);
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (size_t i = 0; i < partition_num; i += elemsPer128) {
-      const T zm_elem = m[i + index];
-      for (size_t j = 0; j < elemsPer128; j++) {
-        out[i + j] = d[i + j] + (n[i + j] * zm_elem);
-      }
-    }
-
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `movprfx zd,
-   * pg/z, zn`.
-   * T represents the type of operands (e.g. for zd.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveMovprfxPredicated_destToZero(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    // TODO: Adopt hint logic of the MOVPRFX instruction
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* n = operands[1].getAsVector<T>();
+/** Helper function for SVE instructions with the format `rev pd, pn`.
+ * T represents the type of operands (e.g. for pd.d, T = uint64_t).
+ * Returns an array of 4 uint64_t elements. */
+template <typename T>
+std::array<uint64_t, 4> sveRev_predicates(std::vector<RegisterValue>& operands,
+                                          const uint16_t VL_bits) {
+  const uint64_t* n = operands[0].getAsVector<uint64_t>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  std::array<uint64_t, 4> out = {0, 0, 0, 0};
+  uint16_t index = partition_num - 1;
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t rev_shifted_active = 1ull
+                                  << ((index % (64 / sizeof(T))) * sizeof(T));
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    out[index / (64 / (sizeof(T)))] |=
+        ((n[i / (64 / (sizeof(T)))] & shifted_active) == shifted_active)
+            ? rev_shifted_active
+            : 0;
+    index--;
+  }
+  return out;
+}
+
+/** Helper function for SVE instructions with the format `rev zd, zn`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveRev_vecs(std::vector<RegisterValue>& operands,
+                          const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
 
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  uint16_t index = partition_num - 1;
 
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i] = n[i];
-      } else {
-        out[i] = 0;
-      }
-    }
-    return {out, 256};
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = n[index];
+    index--;
   }
+  return {out, 256};
+}
 
-  /** Helper function for SVE instructions with the format `movprfx zd,
-   * pg/m, zn`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveMovprfxPredicated_destUnchanged(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    // TODO: Adopt hint logic of the MOVPRFX instruction
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i] = n[i];
-      } else {
-        out[i] = d[i];
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `mul zdn, pg/m, zdn,
-   * <zm, #imm>`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveMulPredicated(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits, bool useImm) {
-    bool isFP = std::is_floating_point<T>::value;
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* n = operands[1].getAsVector<T>();
-    const T* m;
-    T imm;
-    if (useImm)
-      imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm;
+/** Helper function for SVE instructions with the format `sel zd, pg, zn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveSel_zpzz(std::vector<RegisterValue>& operands,
+                          const uint16_t VL_bits) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* n = operands[1].getAsVector<T>();
+  const T* m = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active)
+      out[i] = n[i];
     else
-      m = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i] = n[i] * (useImm ? imm : m[i]);
-      } else
-        out[i] = n[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `mulh zdn, pg/m, zdn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.s, T = int32_t).
-   * TT represents the type twice the length of T (e.g. for T = int8_t, TT =
-   * int16_T).
-   * Returns correctly formatted RegisterValue. */
-  // TODO : Support for int64_t mulh operations.
-  template <typename T, typename TT>
-  static RegisterValue sveMulhPredicated(std::vector<RegisterValue>& operands,
-                                         const uint16_t VL_bits) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* n = operands[1].getAsVector<T>();
-    const T* m = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        bool isNeg = false;
-        T a = n[i];
-        T b = m[i];
-        if (a < 0) {
-          isNeg = !isNeg;
-          a = 0 - a;
-        }
-        if (b < 0) {
-          isNeg = !isNeg;
-          b = 0 - b;
-        }
-        TT tmp = (static_cast<TT>(a) * static_cast<TT>(b));
-        if (isNeg) tmp = 0 - tmp;
-
-        out[i] = static_cast<T>(tmp >> (sizeof(T) * 8));
-      } else
-        out[i] = n[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `orr zd, zn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveOrr_3vecs(std::vector<RegisterValue>& operands,
-                                    const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = n[i] | m[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE2 instructions with the format `psel pd, pn,
-   * pm.t[wa, #imm]`.
-   * T represents the type of operands (e.g. for pm.d, T =
-   * uint64_t). Returns an array of 4 uint64_t elements. */
-  template <typename T>
-  static std::array<uint64_t, 4> svePsel(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const uint64_t* pn = operands[0].getAsVector<uint64_t>();
-    const uint64_t* pm = operands[1].getAsVector<uint64_t>();
-    const uint32_t wa = operands[2].get<uint32_t>();
-    const uint32_t imm = metadata.operands[2].sme_index.disp;
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-
-    uint32_t index = (wa + imm) % partition_num;
+      out[i] = m[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `sminv rd, pg, zn`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveSminv(std::vector<RegisterValue>& operands,
+                       const uint16_t VL_bits) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* n = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out = std::numeric_limits<T>::max();
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) out = std::min(out, n[i]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `Sub zd, zn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveSub_3vecs(std::vector<RegisterValue>& operands,
+                           const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    out[i] = n[i] - m[i];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `Sub zdn, pg/m, zdn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveSubrPredicated_3vecs(std::vector<RegisterValue>& operands,
+                                      const uint16_t VL_bits) {
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* dn = operands[1].getAsVector<T>();
+  const T* m = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i] = m[i] - dn[i];
+    } else {
+      out[i] = dn[i];
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `Sub zdn, pg/m, zdn,
+ * #imm`.
+ * T represents the type of operands (e.g. for zdn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveSubPredicated_imm(
+    std::vector<RegisterValue>& operands,
+    const simeng::arch::aarch64::InstructionMetadata& metadata,
+    const uint16_t VL_bits) {
+  bool isFP = std::is_floating_point<T>::value;
+  const uint64_t* p = operands[0].getAsVector<uint64_t>();
+  const T* dn = operands[1].getAsVector<T>();
+  const auto imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm;
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      out[i] = dn[i] - imm;
+    } else {
+      out[i] = dn[i];
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `sxt<b,h,w> zd, pg,
+ * zn`.
+ * T represents the type of vector registers (e.g. for zd.d, T = int64_t).
+ * C represents the type of the cast required - is linked to instruction
+ * variant used (i.e. sxtw requires int32_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T, typename C>
+RegisterValue sveSxtPredicated(std::vector<RegisterValue>& operands,
+                               const uint16_t VL_bits) {
+  const T* d = operands[0].getAsVector<T>();
+  const uint64_t* p = operands[1].getAsVector<uint64_t>();
+  const T* n = operands[2].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+  for (int i = 0; i < partition_num; i++) {
+    uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
+    if (p[i / (64 / sizeof(T))] & shifted_active) {
+      // Cast to C to get 'least significant sub-element'
+      // Then cast back to T to sign-extend this 'sub-element'
+      out[i] = static_cast<T>(static_cast<C>(n[i]));
+    } else {
+      out[i] = d[i];
+    }
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `trn1 zd, zn, zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveTrn1_3vecs(std::vector<RegisterValue>& operands,
+                            const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < (partition_num / 2); i++) {
+    out[2 * i] = n[(2 * i)];
+    out[(2 * i) + 1] = m[(2 * i)];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `trn2 zd, zn, zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveTrn2_3vecs(std::vector<RegisterValue>& operands,
+                            const uint16_t VL_bits) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < (partition_num / 2); i++) {
+    out[2 * i] = n[(2 * i) + 1];
+    out[(2 * i) + 1] = m[(2 * i) + 1];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `<s,u>unpk>hi,lo> zd,
+ * zn`.
+ * D represents the type of the destination register (e.g. <u>int32_t for
+ * zd.s).
+ * N represents the type of the source register (e.g. <u>int8_t for zn.b).
+ * Returns correctly formatted RegisterValue. */
+template <typename D, typename N>
+RegisterValue sveUnpk_vecs(std::vector<RegisterValue>& operands,
+                           const uint16_t VL_bits, bool isHi) {
+  const N* n = operands[0].getAsVector<N>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(D) * 8);
+  D out[256 / sizeof(D)] = {0};
+
+  for (int i = 0; i < partition_num; i++) {
+    int index = isHi ? (partition_num + i) : i;
+    out[i] = static_cast<D>(n[index]);
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `uqdec<b, d, h, w>
+ * <x,w>d{, pattern{, MUL #imm}}`.
+ * D represents the type of dest. register(e.g. uint32_t for wd).
+ * N represents the type of the operation (e.g. for UQDECH, N = 16u).
+ * Returns single value of type uint64_t. */
+template <typename D, uint64_t N>
+uint64_t sveUqdec(std::vector<RegisterValue>& operands,
+                  const simeng::arch::aarch64::InstructionMetadata& metadata,
+                  const uint16_t VL_bits) {
+  const D d = operands[0].get<D>();
+  const uint8_t imm = metadata.operands[1].imm;
+  const uint16_t count = sveGetPattern(metadata.operandStr, N, VL_bits);
+
+  // The range of possible values does not fit in the range of any integral
+  // type, so a double is used as an intermediate value. The end result must
+  // be saturated to fit in uint64_t.
+  auto intermediate = double(d) - (imm * count);
+  if (intermediate < 0) {
+    return (uint64_t)0;
+  }
+  return (uint64_t)(d - (imm * count));
+}
+
+/** Helper function for SVE instructions with the format `uzp<1,2> zd, zn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveUzp_vecs(std::vector<RegisterValue>& operands,
+                          const uint16_t VL_bits, bool isUzp1) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  for (int i = 0; i < partition_num / 2; i++) {
+    // UZP1 concatenates even elements. UZP2 concatenates odd.
+    int index = isUzp1 ? (2 * i) : (2 * i) + 1;
+    out[i] = n[index];
+  }
+  for (int i = 0; i < partition_num / 2; i++) {
+    int index = isUzp1 ? (2 * i) : (2 * i) + 1;
+    out[partition_num / 2 + i] = m[index];
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions with the format `whilelo pd,
+ * <w,x>n, <w,x>m`.
+ * T represents the type of operands n and m (e.g. for wn, T = uint32_t).
+ * P represents the type of operand p (e.g. for pd.b, P = uint8_t).
+ * Returns tuple of type [pred results (array of 4 uint64_t), nzcv]. */
+template <typename T, typename P>
+std::tuple<std::array<uint64_t, 4>, uint8_t> sveWhilelo(
+    std::vector<RegisterValue>& operands, const uint16_t VL_bits,
+    bool calcNZCV) {
+  const T n = operands[0].get<T>();
+  const T m = operands[1].get<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(P) * 8);
+  std::array<uint64_t, 4> out = {0, 0, 0, 0};
+  uint16_t index = 0;
+
+  for (int i = 0; i < partition_num; i++) {
+    // Determine whether lane should be active and shift to align with
+    // element in predicate register.
+    uint64_t shifted_active =
+        (n + i) < m ? 1ull << ((i % (64 / (sizeof(P))) * (sizeof(P)))) : 0;
+    out[index / (64 / (sizeof(P)))] =
+        out[index / (64 / (sizeof(P)))] | shifted_active;
+    index++;
+  }
+  // Byte count = sizeof(P) as destination predicate is predicate of P
+  // bytes.
+  uint8_t nzcv = calcNZCV ? getNZCVfromPred(out, VL_bits, sizeof(P)) : 0;
+  return {out, nzcv};
+}
+
+/** Helper function for SVE instructions with the format `zip<1,2> pd, pn,
+ * pm`.
+ * T represents the type of operands (e.g. for pn.d, T = uint64_t).
+ * Returns an array of 4 uint64_t elements. */
+template <typename T>
+std::array<uint64_t, 4> sveZip_preds(std::vector<RegisterValue>& operands,
+                                     const uint16_t VL_bits, bool isZip2) {
+  const uint64_t* n = operands[0].getAsVector<uint64_t>();
+  const uint64_t* m = operands[1].getAsVector<uint64_t>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  std::array<uint64_t, 4> out = {0, 0, 0, 0};
+
+  bool interleave = false;
+  int index = isZip2 ? (partition_num / 2) : 0;
+  for (int i = 0; i < partition_num; i++) {
     uint64_t shifted_active = 1ull << ((index % (64 / sizeof(T))) * sizeof(T));
-
-    std::array<uint64_t, 4> out = {0, 0, 0, 0};
-    if (pm[index / (64 / sizeof(T))] & shifted_active) {
-      out = {pn[0], pn[1], pn[2], pn[3]};
-    }
-
-    return out;
-  }
-
-  /** Helper function for SVE instructions with the format `ptrue pd{,
-   * pattern}.
-   * T represents the type of operands (e.g. for pd.d, T = uint64_t).
-   * Returns an array of 4 uint64_t elements. */
-  template <typename T>
-  static std::array<uint64_t, 4> svePtrue(
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    std::array<uint64_t, 4> out = {0, 0, 0, 0};
-
-    // Get pattern
-    const uint16_t count =
-        AuxFunc::sveGetPattern(metadata.operandStr, sizeof(T) * 8, VL_bits);
-    // Exit early if count == 0
-    if (count == 0) return out;
-
-    for (int i = 0; i < partition_num; i++) {
-      if (i < count) {
-        uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-        out[i / (64 / sizeof(T))] |= shifted_active;
-      }
-    }
-    return out;
-  }
-
-  /** Helper function for SVE instructions with the format `punpk<hi,lo> pd.h,
-   * pn.b`.
-   * If `isHI` = false, then PUNPKLO is performed.
-   * Returns an array of 4 uint64_t elements. */
-  static std::array<uint64_t, 4> svePunpk(std::vector<RegisterValue>& operands,
-                                          const uint16_t VL_bits, bool isHi) {
-    const uint64_t* n = operands[0].getAsVector<uint64_t>();
-
-    const uint16_t partition_num = VL_bits / 8;
-    std::array<uint64_t, 4> out = {0, 0, 0, 0};
-    uint16_t index = isHi ? (partition_num / 2) : 0;
-
-    for (int i = 0; i < partition_num / 2; i++) {
-      if (n[index / 64] & 1ull << index % 64) {
-        out[i / 32] |= 1ull << ((i * 2) % 64);
-      }
+    if (interleave) {
+      out[i / (64 / sizeof(T))] |=
+          ((m[index / (64 / sizeof(T))] & shifted_active) == shifted_active)
+              ? static_cast<uint64_t>(1ull
+                                      << ((i % (64 / sizeof(T))) * sizeof(T)))
+              : 0;
       index++;
-    }
-    return out;
-  }
-
-  /** Helper function for SVE instructions with the format `rev pd, pn`.
-   * T represents the type of operands (e.g. for pd.d, T = uint64_t).
-   * Returns an array of 4 uint64_t elements. */
-  template <typename T>
-  static std::array<uint64_t, 4> sveRev_predicates(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    const uint64_t* n = operands[0].getAsVector<uint64_t>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    std::array<uint64_t, 4> out = {0, 0, 0, 0};
-    uint16_t index = partition_num - 1;
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t rev_shifted_active = 1ull
-                                    << ((index % (64 / sizeof(T))) * sizeof(T));
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      out[index / (64 / (sizeof(T)))] |=
-          ((n[i / (64 / (sizeof(T)))] & shifted_active) == shifted_active)
-              ? rev_shifted_active
+    } else {
+      out[i / (64 / sizeof(T))] |=
+          ((n[index / (64 / sizeof(T))] & shifted_active) == shifted_active)
+              ? static_cast<uint64_t>(1ull
+                                      << ((i % (64 / sizeof(T))) * sizeof(T)))
               : 0;
-      index--;
-    }
-    return out;
-  }
-
-  /** Helper function for SVE instructions with the format `rev zd, zn`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveRev_vecs(std::vector<RegisterValue>& operands,
-                                   const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    uint16_t index = partition_num - 1;
-
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = n[index];
-      index--;
     }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `sel zd, pg, zn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveSel_zpzz(std::vector<RegisterValue>& operands,
-                                   const uint16_t VL_bits) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* n = operands[1].getAsVector<T>();
-    const T* m = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active)
-        out[i] = n[i];
-      else
-        out[i] = m[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `sminv rd, pg, zn`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveSminv(std::vector<RegisterValue>& operands,
-                                const uint16_t VL_bits) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* n = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out = std::numeric_limits<T>::max();
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) out = std::min(out, n[i]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `Sub zd, zn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveSub_3vecs(std::vector<RegisterValue>& operands,
-                                    const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      out[i] = n[i] - m[i];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `Sub zdn, pg/m, zdn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveSubrPredicated_3vecs(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits) {
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* dn = operands[1].getAsVector<T>();
-    const T* m = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i] = m[i] - dn[i];
-      } else {
-        out[i] = dn[i];
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `Sub zdn, pg/m, zdn,
-   * #imm`.
-   * T represents the type of operands (e.g. for zdn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveSubPredicated_imm(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    bool isFP = std::is_floating_point<T>::value;
-    const uint64_t* p = operands[0].getAsVector<uint64_t>();
-    const T* dn = operands[1].getAsVector<T>();
-    const auto imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm;
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        out[i] = dn[i] - imm;
-      } else {
-        out[i] = dn[i];
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `sxt<b,h,w> zd, pg,
-   * zn`.
-   * T represents the type of vector registers (e.g. for zd.d, T = int64_t).
-   * C represents the type of the cast required - is linked to instruction
-   * variant used (i.e. sxtw requires int32_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T, typename C>
-  static RegisterValue sveSxtPredicated(std::vector<RegisterValue>& operands,
-                                        const uint16_t VL_bits) {
-    const T* d = operands[0].getAsVector<T>();
-    const uint64_t* p = operands[1].getAsVector<uint64_t>();
-    const T* n = operands[2].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull << ((i % (64 / sizeof(T))) * sizeof(T));
-      if (p[i / (64 / sizeof(T))] & shifted_active) {
-        // Cast to C to get 'least significant sub-element'
-        // Then cast back to T to sign-extend this 'sub-element'
-        out[i] = static_cast<T>(static_cast<C>(n[i]));
-      } else {
-        out[i] = d[i];
-      }
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `trn1 zd, zn, zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveTrn1_3vecs(std::vector<RegisterValue>& operands,
-                                     const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < (partition_num / 2); i++) {
-      out[2 * i] = n[(2 * i)];
-      out[(2 * i) + 1] = m[(2 * i)];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `trn2 zd, zn, zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveTrn2_3vecs(std::vector<RegisterValue>& operands,
-                                     const uint16_t VL_bits) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < (partition_num / 2); i++) {
-      out[2 * i] = n[(2 * i) + 1];
-      out[(2 * i) + 1] = m[(2 * i) + 1];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `<s,u>unpk>hi,lo> zd,
-   * zn`.
-   * D represents the type of the destination register (e.g. <u>int32_t for
-   * zd.s).
-   * N represents the type of the source register (e.g. <u>int8_t for zn.b).
-   * Returns correctly formatted RegisterValue. */
-  template <typename D, typename N>
-  static RegisterValue sveUnpk_vecs(std::vector<RegisterValue>& operands,
-                                    const uint16_t VL_bits, bool isHi) {
-    const N* n = operands[0].getAsVector<N>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(D) * 8);
-    D out[256 / sizeof(D)] = {0};
-
-    for (int i = 0; i < partition_num; i++) {
-      int index = isHi ? (partition_num + i) : i;
-      out[i] = static_cast<D>(n[index]);
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `uqdec<b, d, h, w>
-   * <x,w>d{, pattern{, MUL #imm}}`.
-   * D represents the type of dest. register(e.g. uint32_t for wd).
-   * N represents the type of the operation (e.g. for UQDECH, N = 16u).
-   * Returns single value of type uint64_t. */
-  template <typename D, uint64_t N>
-  static uint64_t sveUqdec(
-      std::vector<RegisterValue>& operands,
-      const simeng::arch::aarch64::InstructionMetadata& metadata,
-      const uint16_t VL_bits) {
-    const D d = operands[0].get<D>();
-    const uint8_t imm = metadata.operands[1].imm;
-    const uint16_t count =
-        AuxFunc::sveGetPattern(metadata.operandStr, N, VL_bits);
-
-    // The range of possible values does not fit in the range of any integral
-    // type, so a double is used as an intermediate value. The end result must
-    // be saturated to fit in uint64_t.
-    auto intermediate = double(d) - (imm * count);
-    if (intermediate < 0) {
-      return (uint64_t)0;
-    }
-    return (uint64_t)(d - (imm * count));
-  }
-
-  /** Helper function for SVE instructions with the format `uzp<1,2> zd, zn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveUzp_vecs(std::vector<RegisterValue>& operands,
-                                   const uint16_t VL_bits, bool isUzp1) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    for (int i = 0; i < partition_num / 2; i++) {
-      // UZP1 concatenates even elements. UZP2 concatenates odd.
-      int index = isUzp1 ? (2 * i) : (2 * i) + 1;
-      out[i] = n[index];
-    }
-    for (int i = 0; i < partition_num / 2; i++) {
-      int index = isUzp1 ? (2 * i) : (2 * i) + 1;
-      out[partition_num / 2 + i] = m[index];
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions with the format `whilelo pd,
-   * <w,x>n, <w,x>m`.
-   * T represents the type of operands n and m (e.g. for wn, T = uint32_t).
-   * P represents the type of operand p (e.g. for pd.b, P = uint8_t).
-   * Returns tuple of type [pred results (array of 4 uint64_t), nzcv]. */
-  template <typename T, typename P>
-  static std::tuple<std::array<uint64_t, 4>, uint8_t> sveWhilelo(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits,
-      bool calcNZCV) {
-    const T n = operands[0].get<T>();
-    const T m = operands[1].get<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(P) * 8);
-    std::array<uint64_t, 4> out = {0, 0, 0, 0};
-    uint16_t index = 0;
-
-    for (int i = 0; i < partition_num; i++) {
-      // Determine whether lane should be active and shift to align with
-      // element in predicate register.
-      uint64_t shifted_active =
-          (n + i) < m ? 1ull << ((i % (64 / (sizeof(P))) * (sizeof(P)))) : 0;
-      out[index / (64 / (sizeof(P)))] =
-          out[index / (64 / (sizeof(P)))] | shifted_active;
+    interleave = !interleave;
+  }
+  return out;
+}
+
+/** Helper function for SVE instructions with the format `zip<1,2> zd, zn,
+ * zm`.
+ * T represents the type of operands (e.g. for zn.d, T = uint64_t).
+ * Returns correctly formatted RegisterValue. */
+template <typename T>
+RegisterValue sveZip_vecs(std::vector<RegisterValue>& operands,
+                          const uint16_t VL_bits, bool isZip2) {
+  const T* n = operands[0].getAsVector<T>();
+  const T* m = operands[1].getAsVector<T>();
+
+  const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
+  T out[256 / sizeof(T)] = {0};
+
+  bool interleave = false;
+  int index = isZip2 ? (partition_num / 2) : 0;
+  for (int i = 0; i < partition_num; i++) {
+    if (interleave) {
+      out[i] = m[index];
       index++;
+    } else {
+      out[i] = n[index];
     }
-    // Byte count = sizeof(P) as destination predicate is predicate of P
-    // bytes.
-    uint8_t nzcv =
-        calcNZCV ? AuxFunc::getNZCVfromPred(out, VL_bits, sizeof(P)) : 0;
-    return {out, nzcv};
-  }
-
-  /** Helper function for SVE instructions with the format `zip<1,2> pd, pn,
-   * pm`.
-   * T represents the type of operands (e.g. for pn.d, T = uint64_t).
-   * Returns an array of 4 uint64_t elements. */
-  template <typename T>
-  static std::array<uint64_t, 4> sveZip_preds(
-      std::vector<RegisterValue>& operands, const uint16_t VL_bits,
-      bool isZip2) {
-    const uint64_t* n = operands[0].getAsVector<uint64_t>();
-    const uint64_t* m = operands[1].getAsVector<uint64_t>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    std::array<uint64_t, 4> out = {0, 0, 0, 0};
-
-    bool interleave = false;
-    int index = isZip2 ? (partition_num / 2) : 0;
-    for (int i = 0; i < partition_num; i++) {
-      uint64_t shifted_active = 1ull
-                                << ((index % (64 / sizeof(T))) * sizeof(T));
-      if (interleave) {
-        out[i / (64 / sizeof(T))] |=
-            ((m[index / (64 / sizeof(T))] & shifted_active) == shifted_active)
-                ? static_cast<uint64_t>(1ull
-                                        << ((i % (64 / sizeof(T))) * sizeof(T)))
-                : 0;
-        index++;
-      } else {
-        out[i / (64 / sizeof(T))] |=
-            ((n[index / (64 / sizeof(T))] & shifted_active) == shifted_active)
-                ? static_cast<uint64_t>(1ull
-                                        << ((i % (64 / sizeof(T))) * sizeof(T)))
-                : 0;
-      }
-      interleave = !interleave;
-    }
-    return out;
-  }
-
-  /** Helper function for SVE instructions with the format `zip<1,2> zd, zn,
-   * zm`.
-   * T represents the type of operands (e.g. for zn.d, T = uint64_t).
-   * Returns correctly formatted RegisterValue. */
-  template <typename T>
-  static RegisterValue sveZip_vecs(std::vector<RegisterValue>& operands,
-                                   const uint16_t VL_bits, bool isZip2) {
-    const T* n = operands[0].getAsVector<T>();
-    const T* m = operands[1].getAsVector<T>();
-
-    const uint16_t partition_num = VL_bits / (sizeof(T) * 8);
-    T out[256 / sizeof(T)] = {0};
-
-    bool interleave = false;
-    int index = isZip2 ? (partition_num / 2) : 0;
-    for (int i = 0; i < partition_num; i++) {
-      if (interleave) {
-        out[i] = m[index];
-        index++;
-      } else {
-        out[i] = n[index];
-      }
-      interleave = !interleave;
-    }
-    return {out, 256};
-  }
-
-  /** Helper function for SVE instructions store instructions to merge
-   * consecutive active elements into blocks to be written.
-   * T represents the size of the vector elements (e.g. for zn.d, T = uint64_t).
-   * C represents the size of the memory elements (e.g. for st1w, C = uint32_t).
-   * Return a vector of RegisterValues.  */
-  template <typename T, typename C = T>
-  static std::vector<RegisterValue> sve_merge_store_data(const T* d,
-                                                         const uint64_t* p,
-                                                         uint16_t vl_bits) {
-    std::vector<RegisterValue> outputData;
-
-    uint16_t numVecElems = (vl_bits / (8 * sizeof(T)));
-    // Determine how many predicate elements are present per uint64_t.
-    uint16_t predsPer64 = (64 / sizeof(T));
-
-    // Determine size of array based on the size of the memory access (This is
-    // the C specifier in sve instructions)
-    std::array<C, 256 / sizeof(C)> mData;
-    uint16_t mdSize = 0;
-
-    for (uint16_t x = 0; x < numVecElems; x++) {
-      // Determine mask to get predication for active element.
-      uint64_t shiftedActive = 1ull << ((x % predsPer64) * sizeof(T));
-      if (p[x / predsPer64] & shiftedActive) {
-        mData[mdSize] = static_cast<C>(d[x]);
-        mdSize++;
-      } else if (mdSize) {
-        outputData.push_back(
-            RegisterValue((char*)mData.data(), mdSize * sizeof(C)));
-        mdSize = 0;
-      }
-    }
-    if (mdSize) {
+    interleave = !interleave;
+  }
+  return {out, 256};
+}
+
+/** Helper function for SVE instructions store instructions to merge
+ * consecutive active elements into blocks to be written.
+ * T represents the size of the vector elements (e.g. for zn.d, T = uint64_t).
+ * C represents the size of the memory elements (e.g. for st1w, C = uint32_t).
+ * Return a vector of RegisterValues.  */
+template <typename T, typename C = T>
+std::vector<RegisterValue> sve_merge_store_data(const T* d, const uint64_t* p,
+                                                uint16_t vl_bits) {
+  std::vector<RegisterValue> outputData;
+
+  uint16_t numVecElems = (vl_bits / (8 * sizeof(T)));
+  // Determine how many predicate elements are present per uint64_t.
+  uint16_t predsPer64 = (64 / sizeof(T));
+
+  // Determine size of array based on the size of the memory access (This is
+  // the C specifier in sve instructions)
+  std::array<C, 256 / sizeof(C)> mData;
+  uint16_t mdSize = 0;
+
+  for (uint16_t x = 0; x < numVecElems; x++) {
+    // Determine mask to get predication for active element.
+    uint64_t shiftedActive = 1ull << ((x % predsPer64) * sizeof(T));
+    if (p[x / predsPer64] & shiftedActive) {
+      mData[mdSize] = static_cast<C>(d[x]);
+      mdSize++;
+    } else if (mdSize) {
       outputData.push_back(
           RegisterValue((char*)mData.data(), mdSize * sizeof(C)));
+      mdSize = 0;
     }
-    return outputData;
   }
-};
+  if (mdSize) {
+    outputData.push_back(
+        RegisterValue((char*)mData.data(), mdSize * sizeof(C)));
+  }
+  return outputData;
+}
+
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
diff --git a/src/include/simeng/arch/riscv/ExceptionHandler.hh b/src/include/simeng/arch/riscv/ExceptionHandler.hh
index 02d29c93bb..c422f0e8b6 100644
--- a/src/include/simeng/arch/riscv/ExceptionHandler.hh
+++ b/src/include/simeng/arch/riscv/ExceptionHandler.hh
@@ -96,6 +96,16 @@ class ExceptionHandler : public simeng::arch::ExceptionHandler {
   static constexpr Register R3 = {RegisterType::GENERAL, 13};
   static constexpr Register R4 = {RegisterType::GENERAL, 14};
   static constexpr Register R5 = {RegisterType::GENERAL, 15};
+
+  /** Let the following ExceptionHandlerTest derived classes be a friend of this
+   * class to allow proper testing of `readStringThen()`, `readBufferThen()` and
+   * `printException()` functions. */
+  friend class RiscVExceptionHandlerTest_readStringThen_Test;
+  friend class RiscVExceptionHandlerTest_readStringThen_maxLen0_Test;
+  friend class RiscVExceptionHandlerTest_readStringThen_maxLenReached_Test;
+  friend class RiscVExceptionHandlerTest_readBufferThen_Test;
+  friend class RiscVExceptionHandlerTest_readBufferThen_length0_Test;
+  friend class RiscVExceptionHandlerTest_printException_Test;
 };
 
 }  // namespace riscv
diff --git a/src/include/simeng/arch/riscv/Instruction.hh b/src/include/simeng/arch/riscv/Instruction.hh
index 2ba1c03d8c..b3064d6b48 100644
--- a/src/include/simeng/arch/riscv/Instruction.hh
+++ b/src/include/simeng/arch/riscv/Instruction.hh
@@ -23,6 +23,9 @@ const uint8_t GENERAL = 0;
 const uint8_t FLOAT = 1;
 /** The system registers. */
 const uint8_t SYSTEM = 2;
+
+/** A special register value representing the zero register. */
+const Register ZERO_REGISTER = {GENERAL, (uint16_t)0};
 }  // namespace RegisterType
 
 /** A struct holding user-defined execution information for a aarch64
@@ -42,7 +45,6 @@ struct executionInfo {
 enum class InstructionException {
   None = 0,
   EncodingUnallocated,
-  EncodingNotYetImplemented,
   ExecutionNotYetImplemented,
   AliasNotYetImplemented,
   MisalignedPC,
@@ -167,11 +169,6 @@ class Instruction : public simeng::Instruction {
   /** Retrieve the instruction's associated architecture. */
   const Architecture& getArchitecture() const;
 
-  /** A special register value representing the zero register. If passed to
-   * `setSourceRegisters`/`setDestinationRegisters`, the value will be
-   * automatically supplied as zero. */
-  static const Register ZERO_REGISTER;
-
   /** The maximum number of source registers any supported RISC-V instruction
    * can have. */
   static const uint8_t MAX_SOURCE_REGISTERS = 3;
diff --git a/src/include/simeng/config/ModelConfig.hh b/src/include/simeng/config/ModelConfig.hh
index b16fae5585..8c18b7e0c9 100644
--- a/src/include/simeng/config/ModelConfig.hh
+++ b/src/include/simeng/config/ModelConfig.hh
@@ -15,6 +15,7 @@
 
 #include "simeng/config/ExpectationNode.hh"
 #include "simeng/config/yaml/ryml.hh"
+#include "simeng/version.hh"
 
 namespace simeng {
 namespace config {
@@ -115,6 +116,9 @@ class ModelConfig {
 
   /** A string stream containing information about invalid values. */
   std::ostringstream invalid_;
+
+  /** The default special file directory. */
+  std::string defaultSpecialFilePath_ = SIMENG_BUILD_DIR "/specialFiles/";
 };  // namespace ModelConfig
 
 }  // namespace config
diff --git a/src/include/simeng/config/SimInfo.hh b/src/include/simeng/config/SimInfo.hh
index 7a247b843e..333014f168 100644
--- a/src/include/simeng/config/SimInfo.hh
+++ b/src/include/simeng/config/SimInfo.hh
@@ -51,7 +51,7 @@ class SimInfo {
       getInstance()->modelConfig_.reGenerateDefault(ISA::RV64, force);
 
     // Update config path to be the default string
-    getInstance()->setConfigPath(DEFAULT_STR);
+    getInstance()->configFilePath_ = DEFAULT_STR;
 
     // Replace the validated config with the new default config
     getInstance()->validatedConfig_ = getInstance()->modelConfig_.getConfig();
@@ -62,11 +62,6 @@ class SimInfo {
   /** A getter function to retrieve the config file path. */
   static std::string getConfigPath() { return getInstance()->configFilePath_; }
 
-  /** A setter function to set the config file path. */
-  static void setConfigPath(std::string path) {
-    getInstance()->configFilePath_ = path;
-  }
-
   /** A getter function to retrieve the simulation mode of the current SimEng
    * instance. */
   static SimulationMode getSimMode() { return getInstance()->mode_; }
diff --git a/src/include/simeng/kernel/Linux.hh b/src/include/simeng/kernel/Linux.hh
index 0908d59006..b8ec954599 100644
--- a/src/include/simeng/kernel/Linux.hh
+++ b/src/include/simeng/kernel/Linux.hh
@@ -130,6 +130,9 @@ struct linux_dirent64 {
    to Linux system calls. */
 class Linux {
  public:
+  Linux(const std::string specialFiledirPath)
+      : specialFilesDir_(specialFiledirPath) {}
+
   /** Create a new Linux process running above this kernel. */
   void createProcess(const LinuxProcess& process);
 
@@ -252,7 +255,7 @@ class Linux {
   std::unordered_map<std::string, const std::string> specialPathTranslations_;
 
   /** Path to the root of the replacement special files. */
-  const std::string specialFilesDir_ = SIMENG_BUILD_DIR "/specialFiles";
+  const std::string specialFilesDir_;
 
   /** Vector of all currently supported special file paths & files.*/
   std::vector<std::string> supportedSpecialFiles_;
diff --git a/src/include/simeng/pipeline/A64FXPortAllocator.hh b/src/include/simeng/pipeline/A64FXPortAllocator.hh
index 74f27faf25..22261abad2 100644
--- a/src/include/simeng/pipeline/A64FXPortAllocator.hh
+++ b/src/include/simeng/pipeline/A64FXPortAllocator.hh
@@ -21,17 +21,21 @@ const uint8_t BR = 5;
  * described in the A64FX Microarchitecture manual. */
 class A64FXPortAllocator : public PortAllocator {
  public:
+  /** Constructor for the A64FXPortAllocator object. */
   A64FXPortAllocator(const std::vector<std::vector<uint16_t>>& portArrangement);
 
+  /** Allocate a port for the specified instruction group; returns the allocated
+   * port. */
   uint16_t allocate(const std::vector<uint16_t>& ports) override;
 
+  /** Inform the allocator that an instruction was issued to the specified port.
+   */
   void issued(uint16_t port) override;
 
+  /** Inform the allocator that an instruction will not issue to its
+   * allocated port. */
   void deallocate(uint16_t port) override;
 
-  /** A mapping from issye ports to instruction attribute */
-  uint8_t attributeMapping(const std::vector<uint16_t>& ports);
-
   /** Set function from DispatchIssueUnit to retrieve reservation
    * station sizes during execution. */
   void setRSSizeGetter(
@@ -41,6 +45,9 @@ class A64FXPortAllocator : public PortAllocator {
   void tick() override;
 
  private:
+  /** A mapping from issue ports to instruction attribute */
+  uint8_t attributeMapping(const std::vector<uint16_t>& ports);
+
   /** An approximate estimation of the index of an instruction within the input
    * buffer of the dispatch unit. Increments slot at each allocation thus cannot
    * account for nullptr entries in buffer.*/
diff --git a/src/include/simeng/pipeline/DispatchIssueUnit.hh b/src/include/simeng/pipeline/DispatchIssueUnit.hh
index 2e533cf125..dd8654d921 100644
--- a/src/include/simeng/pipeline/DispatchIssueUnit.hh
+++ b/src/include/simeng/pipeline/DispatchIssueUnit.hh
@@ -30,8 +30,7 @@ struct ReservationStation {
   uint16_t capacity;
   /** Number of instructions that can be dispatched to this unit per cycle. */
   uint16_t dispatchRate;
-  /** Current number of non-stalled instructions
-   * in reservation station */
+  /** Current number of instructions in reservation station */
   uint16_t currentSize;
   /** Issue ports belonging to reservation station */
   std::vector<ReservationStationPort> ports;
@@ -75,9 +74,6 @@ class DispatchIssueUnit {
   void forwardOperands(const span<Register>& destinations,
                        const span<RegisterValue>& values);
 
-  /** Set the scoreboard entry for the provided register as ready. */
-  void setRegisterReady(Register reg);
-
   /** Clear the RS of all flushed instructions. */
   void purgeFlushed();
 
diff --git a/src/include/simeng/pipeline/ExecuteUnit.hh b/src/include/simeng/pipeline/ExecuteUnit.hh
index 8a480fac10..14d8b47e7c 100644
--- a/src/include/simeng/pipeline/ExecuteUnit.hh
+++ b/src/include/simeng/pipeline/ExecuteUnit.hh
@@ -48,9 +48,9 @@ class ExecuteUnit {
    * discovered misprediction. */
   uint64_t getFlushAddress() const;
 
-  /** Retrieve the sequence ID associated with the most recently discovered
+  /** Retrieve the instruction ID associated with the most recently discovered
    * misprediction. */
-  uint64_t getFlushSeqId() const;
+  uint64_t getFlushInsnId() const;
 
   /** Purge flushed instructions from the internal pipeline and clear any active
    * stall, if applicable. */
diff --git a/src/include/simeng/pipeline/RegisterAliasTable.hh b/src/include/simeng/pipeline/RegisterAliasTable.hh
index e3a30ea7b1..43b8e0db4c 100644
--- a/src/include/simeng/pipeline/RegisterAliasTable.hh
+++ b/src/include/simeng/pipeline/RegisterAliasTable.hh
@@ -15,7 +15,7 @@ class RegisterAliasTable {
    * structure, and the corresponding numbers of physical registers that should
    * be available. */
   RegisterAliasTable(std::vector<RegisterFileStructure> architecturalStructure,
-                     std::vector<uint16_t> physicalStructure);
+                     std::vector<uint16_t> physicalRegisterCounts);
 
   /** Retrieve the current physical register assigned to the provided
    * architectural register. */
@@ -43,9 +43,6 @@ class RegisterAliasTable {
    * is reinstated to the mapping table, and the provided register is freed. */
   void rewind(Register physical);
 
-  /** Free the provided physical register. */
-  void free(Register physical);
-
  private:
   /** The register mapping tables. Holds a map of architectural -> physical
    * register mappings for each register type. */
diff --git a/src/include/simeng/pipeline/ReorderBuffer.hh b/src/include/simeng/pipeline/ReorderBuffer.hh
index 179d9bb689..a98471c2e8 100644
--- a/src/include/simeng/pipeline/ReorderBuffer.hh
+++ b/src/include/simeng/pipeline/ReorderBuffer.hh
@@ -59,7 +59,7 @@ class ReorderBuffer {
   unsigned int commit(unsigned int maxCommitSize);
 
   /** Flush all instructions with a sequence ID greater than `afterSeqId`. */
-  void flush(uint64_t afterSeqId);
+  void flush(uint64_t afterInsnId);
 
   /** Retrieve the current size of the ROB. */
   unsigned int size() const;
@@ -75,9 +75,9 @@ class ReorderBuffer {
    * discovered memory order violation. */
   uint64_t getFlushAddress() const;
 
-  /** Retrieve the sequence ID associated with the most recently discovered
+  /** Retrieve the instruction ID associated with the most recently discovered
    * memory order violation. */
-  uint64_t getFlushSeqId() const;
+  uint64_t getFlushInsnId() const;
 
   /** Get the number of instructions the ROB has committed. */
   uint64_t getInstructionsCommittedCount() const;
diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc
index 7ab82ecf73..af4ee73b9d 100644
--- a/src/lib/CoreInstance.cc
+++ b/src/lib/CoreInstance.cc
@@ -5,16 +5,20 @@ namespace simeng {
 CoreInstance::CoreInstance(std::string executablePath,
                            std::vector<std::string> executableArgs,
                            ryml::ConstNodeRef config)
-    : config_(config) {
+    : config_(config),
+      kernel_(kernel::Linux(
+          config_["CPU-Info"]["Special-File-Dir-Path"].as<std::string>())) {
   generateCoreModel(executablePath, executableArgs);
 }
 
 CoreInstance::CoreInstance(char* assembledSource, size_t sourceSize,
                            ryml::ConstNodeRef config)
-    : config_(config) {
-  source_ = assembledSource;
-  sourceSize_ = sourceSize;
-  assembledSource_ = true;
+    : config_(config),
+      kernel_(kernel::Linux(
+          config_["CPU-Info"]["Special-File-Dir-Path"].as<std::string>())),
+      source_(assembledSource),
+      sourceSize_(sourceSize),
+      assembledSource_(true) {
   // Pass an empty string for executablePath and empty vector of strings for
   // executableArgs.
   generateCoreModel("", std::vector<std::string>{});
diff --git a/src/lib/FixedLatencyMemoryInterface.cc b/src/lib/FixedLatencyMemoryInterface.cc
index 6ccf48a147..4bf57f1f40 100644
--- a/src/lib/FixedLatencyMemoryInterface.cc
+++ b/src/lib/FixedLatencyMemoryInterface.cc
@@ -1,6 +1,6 @@
 #include "simeng/FixedLatencyMemoryInterface.hh"
 
-#include <cassert>
+#include <iostream>
 
 namespace simeng {
 
@@ -24,8 +24,12 @@ void FixedLatencyMemoryInterface::tick() {
 
     if (request.write) {
       // Write: write data directly to memory
-      assert(target.address + target.size <= size_ &&
-             "Attempted to write beyond memory limit");
+      if (target.address + target.size > size_) {
+        std::cerr << "[SimEng:FixedLatencyMemoryInterface] Attempted to write "
+                     "beyond memory limit."
+                  << std::endl;
+        exit(1);
+      }
 
       auto ptr = memory_ + target.address;
       // Copy the data from the RegisterValue to memory
diff --git a/src/lib/FlatMemoryInterface.cc b/src/lib/FlatMemoryInterface.cc
index 8360626e3f..730e615101 100644
--- a/src/lib/FlatMemoryInterface.cc
+++ b/src/lib/FlatMemoryInterface.cc
@@ -1,6 +1,5 @@
 #include "simeng/FlatMemoryInterface.hh"
 
-#include <cassert>
 #include <iostream>
 
 namespace simeng {
@@ -25,8 +24,12 @@ void FlatMemoryInterface::requestRead(const MemoryAccessTarget& target,
 
 void FlatMemoryInterface::requestWrite(const MemoryAccessTarget& target,
                                        const RegisterValue& data) {
-  assert(target.address + target.size <= size_ &&
-         "Attempted to write beyond memory limit");
+  if (target.address + target.size > size_) {
+    std::cerr << "[SimEng:FlatLatencyMemoryInterface] Attempted to write "
+                 "beyond memory limit."
+              << std::endl;
+    exit(1);
+  }
 
   auto ptr = memory_ + target.address;
   // Copy the data from the RegisterValue to memory
diff --git a/src/lib/SpecialFileDirGen.cc b/src/lib/SpecialFileDirGen.cc
index 1c18ab51c3..0acf8984eb 100644
--- a/src/lib/SpecialFileDirGen.cc
+++ b/src/lib/SpecialFileDirGen.cc
@@ -5,7 +5,9 @@
 namespace simeng {
 
 SpecialFileDirGen::SpecialFileDirGen(ryml::ConstNodeRef config)
-    : coreCount_(config["CPU-Info"]["Core-Count"].as<uint64_t>()),
+    : specialFilesDir_(
+          config["CPU-Info"]["Special-File-Dir-Path"].as<std::string>()),
+      coreCount_(config["CPU-Info"]["Core-Count"].as<uint64_t>()),
       socketCount_(config["CPU-Info"]["Socket-Count"].as<uint64_t>()),
       smt_(config["CPU-Info"]["SMT"].as<uint64_t>()),
       bogoMIPS_(config["CPU-Info"]["BogoMIPS"].as<float>()),
@@ -23,12 +25,12 @@ void SpecialFileDirGen::RemoveExistingSFDir() {
     const std::string rm_input = "rm -r " + specialFilesDir_;
     system(rm_input.c_str());
   }
-  const std::string mk_input = "mkdir " + specialFilesDir_;
-  system(mk_input.c_str());
   return;
 }
 
 void SpecialFileDirGen::GenerateSFDir() {
+  // Create root special files directory
+  system(("mkdir -p " + specialFilesDir_).c_str());
   // Define frequently accessed root directories in special file tree
   const std::string proc_dir = specialFilesDir_ + "/proc/";
   const std::string online_dir = specialFilesDir_ + "/sys/devices/system/cpu/";
diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc
index a3e0e698ad..df4e55c07f 100644
--- a/src/lib/arch/aarch64/Instruction.cc
+++ b/src/lib/arch/aarch64/Instruction.cc
@@ -8,9 +8,6 @@ namespace simeng {
 namespace arch {
 namespace aarch64 {
 
-const Register Instruction::ZERO_REGISTER = {RegisterType::GENERAL,
-                                             (uint16_t)-1};
-
 Instruction::Instruction(const Architecture& architecture,
                          const InstructionMetadata& metadata,
                          MicroOpInfo microOpInfo)
@@ -37,7 +34,8 @@ Instruction::Instruction(const Architecture& architecture,
 InstructionException Instruction::getException() const { return exception_; }
 
 const span<Register> Instruction::getSourceRegisters() const {
-  return {const_cast<Register*>(sourceRegisters.data()), sourceRegisterCount};
+  return {const_cast<Register*>(sourceRegisters.data()),
+          sourceRegisters.size()};
 }
 
 const span<RegisterValue> Instruction::getSourceOperands() const {
@@ -45,6 +43,10 @@ const span<RegisterValue> Instruction::getSourceOperands() const {
 }
 
 const span<Register> Instruction::getDestinationRegisters() const {
+  // The `destinationRegisterCount` is used here as the span count value because
+  // there may be n number of zero registers in the latter indexes of the
+  // `destinationRegisters` vector. These cannot be written to and hence
+  // shouldn't be included in the returned span.
   return {const_cast<Register*>(destinationRegisters.data()),
           destinationRegisterCount};
 }
@@ -55,6 +57,7 @@ bool Instruction::isOperandReady(int index) const {
 void Instruction::renameSource(uint16_t i, Register renamed) {
   sourceRegisters[i] = renamed;
 }
+
 void Instruction::renameDestination(uint16_t i, Register renamed) {
   destinationRegisters[i] = renamed;
 }
@@ -95,6 +98,10 @@ span<const RegisterValue> Instruction::getData() const {
 bool Instruction::canExecute() const { return (operandsPending == 0); }
 
 const span<RegisterValue> Instruction::getResults() const {
+  // The `destinationRegisterCount` is used here as the span count value because
+  // there may be n number of values attributed to zero registers in the latter
+  // indexes of the `results` vector. Zero registers cannot be written to and
+  // hence shouldn't be included in the returned span.
   return {const_cast<RegisterValue*>(results.data()), destinationRegisterCount};
 }
 
@@ -185,6 +192,7 @@ void Instruction::setExecutionInfo(const ExecutionInfo& info) {
   stallCycles_ = info.stallCycles;
   supportedPorts_ = info.ports;
 }
+
 const std::vector<uint16_t>& Instruction::getSupportedPorts() {
   if (supportedPorts_.size() == 0) {
     exception_ = InstructionException::NoAvailablePort;
@@ -199,64 +207,6 @@ const Architecture& Instruction::getArchitecture() const {
   return architecture_;
 }
 
-/** Extend `value` according to `extendType`, and left-shift the result by
- * `shift` */
-uint64_t Instruction::extendValue(uint64_t value, uint8_t extendType,
-                                  uint8_t shift) const {
-  if (extendType == ARM64_EXT_INVALID && shift == 0) {
-    // Special case: an invalid shift type with a shift amount of 0 implies an
-    // identity operation
-    return value;
-  }
-
-  uint64_t extended;
-  switch (extendType) {
-    case ARM64_EXT_UXTB:
-      extended = static_cast<uint8_t>(value);
-      break;
-    case ARM64_EXT_UXTH:
-      extended = static_cast<uint16_t>(value);
-      break;
-    case ARM64_EXT_UXTW:
-      extended = static_cast<uint32_t>(value);
-      break;
-    case ARM64_EXT_UXTX:
-      extended = value;
-      break;
-    case ARM64_EXT_SXTB:
-      extended = static_cast<int8_t>(value);
-      break;
-    case ARM64_EXT_SXTH:
-      extended = static_cast<int16_t>(value);
-      break;
-    case ARM64_EXT_SXTW:
-      extended = static_cast<int32_t>(value);
-      break;
-    case ARM64_EXT_SXTX:
-      extended = value;
-      break;
-    default:
-      assert(false && "Invalid extension type");
-      return 0;
-  }
-
-  return extended << shift;
-}
-
-/** Extend `value` using extension/shifting rules defined in `op`. */
-uint64_t Instruction::extendOffset(uint64_t value,
-                                   const cs_arm64_op& op) const {
-  if (op.ext == 0) {
-    if (op.shift.value == 0) {
-      return value;
-    }
-    if (op.shift.type == 1) {
-      return extendValue(value, ARM64_EXT_UXTX, op.shift.value);
-    }
-  }
-  return extendValue(value, op.ext, op.shift.value);
-}
-
 }  // namespace aarch64
 }  // namespace arch
 }  // namespace simeng
diff --git a/src/lib/arch/aarch64/InstructionMetadata.cc b/src/lib/arch/aarch64/InstructionMetadata.cc
index 6421664ce0..59f529e3c6 100644
--- a/src/lib/arch/aarch64/InstructionMetadata.cc
+++ b/src/lib/arch/aarch64/InstructionMetadata.cc
@@ -1463,6 +1463,9 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
     case Opcode::AArch64_STRXui:
       operands[1].access = CS_AC_READ;
       break;
+    case Opcode::AArch64_PFALSE:
+      operands[0].access = CS_AC_WRITE;
+      break;
     case Opcode::AArch64_STR_PXI:
       [[fallthrough]];
     case Opcode::AArch64_STR_ZXI:
diff --git a/src/lib/arch/aarch64/Instruction_address.cc b/src/lib/arch/aarch64/Instruction_address.cc
index 3878d80648..42b2fed7be 100644
--- a/src/lib/arch/aarch64/Instruction_address.cc
+++ b/src/lib/arch/aarch64/Instruction_address.cc
@@ -2,6 +2,7 @@
 #include <iostream>
 
 #include "InstructionMetadata.hh"
+#include "simeng/arch/aarch64/helpers/auxiliaryFunctions.hh"
 
 namespace simeng {
 namespace arch {
diff --git a/src/lib/arch/aarch64/Instruction_decode.cc b/src/lib/arch/aarch64/Instruction_decode.cc
index 4e60ae1b62..bc90f8a2f9 100644
--- a/src/lib/arch/aarch64/Instruction_decode.cc
+++ b/src/lib/arch/aarch64/Instruction_decode.cc
@@ -17,6 +17,7 @@ namespace aarch64 {
 constexpr bool bit(uint32_t value, uint8_t start) {
   return (value >> start) & 1;
 }
+
 // Extract bits `start` to `start+width` of `value`
 constexpr uint32_t bits(uint32_t value, uint8_t start, uint8_t width) {
   return ((value >> start) & ((1 << width) - 1));
@@ -24,6 +25,7 @@ constexpr uint32_t bits(uint32_t value, uint8_t start, uint8_t width) {
 
 // Generate a general purpose register identifier with tag `tag`
 constexpr Register genReg(uint16_t tag) { return {RegisterType::GENERAL, tag}; }
+
 // Generate a NZCV register identifier
 constexpr Register nzcvReg() { return {RegisterType::NZCV, 0}; }
 
@@ -97,7 +99,7 @@ Register csRegToRegister(arm64_reg reg) {
 
   // ARM64_REG_WZR and _XZR are zero registers, and don't read
   if (reg == ARM64_REG_WZR || reg == ARM64_REG_XZR) {
-    return Instruction::ZERO_REGISTER;
+    return RegisterType::ZERO_REGISTER;
   }
 
   // ARM64_REG_SP and _WSP are stack pointer registers, stored in r31 of the
@@ -197,55 +199,58 @@ void Instruction::decode() {
     sourceRegisters.push_back(
         csRegToRegister(static_cast<arm64_reg>(metadata.implicitSources[i])));
     operandsPending++;
-    sourceRegisterCount++;
   }
 
   bool accessesMemory = false;
+  uint16_t zrDestRegs = 0;
 
   // Extract explicit register accesses
   for (size_t i = 0; i < metadata.operandCount; i++) {
     const auto& op = metadata.operands[i];
 
     if (op.type == ARM64_OP_REG) {  // Register operand
-      if ((op.access & cs_ac_type::CS_AC_WRITE) && op.reg != ARM64_REG_WZR &&
-          op.reg != ARM64_REG_XZR) {
-        // Determine the data type the instruction operates on based on the
-        // register operand used
-        // Belongs to the predicate group if the destination register is a
-        // predicate
-        if (op.reg >= ARM64_REG_V0) {
-          isVectorData_ = true;
-        } else if (op.reg >= ARM64_REG_ZAB0 || op.reg == ARM64_REG_ZA) {
-          isSMEData_ = true;
-        } else if (op.reg >= ARM64_REG_Z0) {
-          isSVEData_ = true;
-        } else if (op.reg <= ARM64_REG_S31 && op.reg >= ARM64_REG_Q0) {
-          isScalarData_ = true;
-        } else if (op.reg <= ARM64_REG_P15 && op.reg >= ARM64_REG_P0) {
-          isPredicate_ = true;
-        } else if (op.reg <= ARM64_REG_H31 && op.reg >= ARM64_REG_B0) {
-          isScalarData_ = true;
-        }
+      if ((op.access & cs_ac_type::CS_AC_WRITE)) {
+        if (op.reg != ARM64_REG_WZR && op.reg != ARM64_REG_XZR) {
+          // Determine the data type the instruction operates on based on the
+          // register operand used
+          // Belongs to the predicate group if the destination register is a
+          // predicate
+          if (op.reg >= ARM64_REG_V0) {
+            isVectorData_ = true;
+          } else if (op.reg >= ARM64_REG_ZAB0 || op.reg == ARM64_REG_ZA) {
+            isSMEData_ = true;
+          } else if (op.reg >= ARM64_REG_Z0) {
+            isSVEData_ = true;
+          } else if (op.reg <= ARM64_REG_S31 && op.reg >= ARM64_REG_Q0) {
+            isScalarData_ = true;
+          } else if (op.reg <= ARM64_REG_P15 && op.reg >= ARM64_REG_P0) {
+            isPredicate_ = true;
+          } else if (op.reg <= ARM64_REG_H31 && op.reg >= ARM64_REG_B0) {
+            isScalarData_ = true;
+          }
 
-        if ((op.reg >= ARM64_REG_ZAB0 && op.reg < ARM64_REG_V0) ||
-            (op.reg == ARM64_REG_ZA)) {
-          // Add all Matrix register rows as destination operands
-          std::vector<Register> regs =
-              getZARowVectors(op.reg, architecture_.getStreamingVectorLength());
-          for (int i = 0; i < regs.size(); i++) {
-            destinationRegisters.push_back(regs[i]);
+          if ((op.reg >= ARM64_REG_ZAB0 && op.reg < ARM64_REG_V0) ||
+              (op.reg == ARM64_REG_ZA)) {
+            // Add all Matrix register rows as destination operands
+            std::vector<Register> regs = getZARowVectors(
+                op.reg, architecture_.getStreamingVectorLength());
+            for (int i = 0; i < regs.size(); i++) {
+              destinationRegisters.push_back(regs[i]);
+              destinationRegisterCount++;
+              // If WRITE, also need to add to source registers to maintain
+              // unaltered row values
+              sourceRegisters.push_back(regs[i]);
+              operandsPending++;
+            }
+          } else {
+            // Add register writes to destinations, but skip zero-register
+            // destinations
+            destinationRegisters.push_back(csRegToRegister(op.reg));
             destinationRegisterCount++;
-            // If WRITE, also need to add to source registers to maintain
-            // unaltered row values
-            sourceRegisters.push_back(regs[i]);
-            sourceRegisterCount++;
-            operandsPending++;
           }
         } else {
-          // Add register writes to destinations, but skip zero-register
-          // destinations
-          destinationRegisters.push_back(csRegToRegister(op.reg));
-          destinationRegisterCount++;
+          // Need to allocate extra space in results vector for zero destination
+          zrDestRegs++;
         }
       }
       if (op.access & cs_ac_type::CS_AC_READ) {
@@ -256,14 +261,12 @@ void Instruction::decode() {
               getZARowVectors(op.reg, architecture_.getStreamingVectorLength());
           for (int i = 0; i < regs.size(); i++) {
             sourceRegisters.push_back(regs[i]);
-            sourceRegisterCount++;
             operandsPending++;
           }
         } else {
           // Add register reads to destinations
           sourceRegisters.push_back(csRegToRegister(op.reg));
           operandsPending++;
-          sourceRegisterCount++;
         }
         if (op.shift.value > 0) isNoShift_ = false;  // Identify shift operands
       }
@@ -271,7 +274,6 @@ void Instruction::decode() {
       accessesMemory = true;
       sourceRegisters.push_back(csRegToRegister(op.mem.base));
       operandsPending++;
-      sourceRegisterCount++;
 
       if (metadata.writeback) {
         // Writeback instructions modify the base address
@@ -282,7 +284,6 @@ void Instruction::decode() {
         // Register offset; add to sources
         sourceRegisters.push_back(csRegToRegister(op.mem.index));
         operandsPending++;
-        sourceRegisterCount++;
       }
     } else if (op.type == ARM64_OP_SME_INDEX) {  // SME instruction with index
       std::vector<Register> regs;
@@ -297,7 +298,6 @@ void Instruction::decode() {
         // un-updated rows
         for (int i = 0; i < regs.size(); i++) {
           sourceRegisters.push_back(regs[i]);
-          sourceRegisterCount++;
           operandsPending++;
           if (op.access & cs_ac_type::CS_AC_WRITE) {
             destinationRegisters.push_back(regs[i]);
@@ -314,25 +314,20 @@ void Instruction::decode() {
         } else if (op.access & cs_ac_type::CS_AC_READ) {
           sourceRegisters.push_back(csRegToRegister(op.sme_index.reg));
           operandsPending++;
-          sourceRegisterCount++;
         }
       }
       // Register that is base of index will always be a source operand
       sourceRegisters.push_back(csRegToRegister(op.sme_index.base));
       operandsPending++;
-      sourceRegisterCount++;
     } else if (op.type == ARM64_OP_REG_MRS) {
       int32_t sysRegTag = architecture_.getSystemRegisterTag(op.imm);
       if (sysRegTag == -1) {
         exceptionEncountered_ = true;
         exception_ = InstructionException::UnmappedSysReg;
-        // Clear any registered operands
-        sourceRegisterCount = 0;
-        destinationRegisterCount = 0;
+        return;
       } else {
         sourceRegisters.push_back(
             {RegisterType::SYSTEM, static_cast<uint16_t>(sysRegTag)});
-        sourceRegisterCount++;
         operandsPending++;
       }
     } else if (op.type == ARM64_OP_REG_MSR) {
@@ -340,9 +335,7 @@ void Instruction::decode() {
       if (sysRegTag == -1) {
         exceptionEncountered_ = true;
         exception_ = InstructionException::UnmappedSysReg;
-        // Clear any registered operands
-        sourceRegisterCount = 0;
-        destinationRegisterCount = 0;
+        return;
       } else {
         destinationRegisters.push_back(
             {RegisterType::SYSTEM, static_cast<uint16_t>(sysRegTag)});
@@ -638,15 +631,17 @@ void Instruction::decode() {
   }
 
   // Allocate enough entries in results vector
-  results.resize(destinationRegisterCount + 1);
+  results.resize(destinationRegisterCount + zrDestRegs);
   // Allocate enough entries in the operands vector
-  operands.resize(sourceRegisterCount + 1);
+  operands.resize(sourceRegisters.size());
 
   // Catch zero register references and pre-complete those operands
-  for (uint16_t i = 0; i < sourceRegisterCount; i++) {
-    if (sourceRegisters[i] == Instruction::ZERO_REGISTER) {
-      operands[i] = RegisterValue(0, 8);
-      operandsPending--;
+  if (!(isSMEData_)) {
+    for (uint16_t i = 0; i < sourceRegisters.size(); i++) {
+      if (sourceRegisters[i] == RegisterType::ZERO_REGISTER) {
+        operands[i] = RegisterValue(0, 8);
+        operandsPending--;
+      }
     }
   }
 }
diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc
index 03fe5a5410..7f75fb4340 100644
--- a/src/lib/arch/aarch64/Instruction_execute.cc
+++ b/src/lib/arch/aarch64/Instruction_execute.cc
@@ -10,11 +10,9 @@
 #include "simeng/arch/aarch64/helpers/conditional.hh"
 #include "simeng/arch/aarch64/helpers/divide.hh"
 #include "simeng/arch/aarch64/helpers/float.hh"
-#include "simeng/arch/aarch64/helpers/load.hh"
 #include "simeng/arch/aarch64/helpers/logical.hh"
 #include "simeng/arch/aarch64/helpers/multiply.hh"
 #include "simeng/arch/aarch64/helpers/neon.hh"
-#include "simeng/arch/aarch64/helpers/store.hh"
 #include "simeng/arch/aarch64/helpers/sve.hh"
 
 namespace simeng {
@@ -106,7 +104,7 @@ void Instruction::execute() {
   } else {
     switch (metadata.opcode) {
       case Opcode::AArch64_ADCXr: {  // adc xd, xn, xm
-        auto [result, nzcv] = arithmeticHelp::addCarry_3ops<uint64_t>(operands);
+        auto [result, nzcv] = addCarry_3ops<uint64_t>(operands);
         results[0] = result;
         break;
       }
@@ -119,56 +117,52 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_ADDPv16i8: {  // addp vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecAddp_3ops<uint8_t, 16>(operands);
+        results[0] = vecAddp_3ops<uint8_t, 16>(operands);
         break;
       }
       case Opcode::AArch64_ADDPv2i64: {  // addp vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecAddp_3ops<uint64_t, 2>(operands);
+        results[0] = vecAddp_3ops<uint64_t, 2>(operands);
         break;
       }
       case Opcode::AArch64_ADDPv2i64p: {  // addp dd, vn.2d
-        results[0] = neonHelp::vecSumElems_2ops<uint64_t, 2>(operands);
+        results[0] = vecSumElems_2ops<uint64_t, 2>(operands);
         break;
       }
       case Opcode::AArch64_ADDPv4i32: {  // addp vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecAddp_3ops<uint32_t, 4>(operands);
+        results[0] = vecAddp_3ops<uint32_t, 4>(operands);
         break;
       }
       case Opcode::AArch64_ADDPv8i16: {  // addp vd.8h, vn.8h, vm.8h
-        results[0] = neonHelp::vecAddp_3ops<uint16_t, 8>(operands);
+        results[0] = vecAddp_3ops<uint16_t, 8>(operands);
         break;
       }
       case Opcode::AArch64_ADDSWri: {  // adds wd, wn, #imm{, shift}
-        auto [result, nzcv] =
-            arithmeticHelp::addShift_imm<uint32_t>(operands, metadata, true);
+        auto [result, nzcv] = addShift_imm<uint32_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = {result, 8};
         break;
       }
       case Opcode::AArch64_ADDSWrs: {  // adds wd, wn, wm{, shift}
-        auto [result, nzcv] =
-            arithmeticHelp::addShift_3ops<uint32_t>(operands, metadata, true);
+        auto [result, nzcv] = addShift_3ops<uint32_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = {result, 8};
         break;
       }
       case Opcode::AArch64_ADDSWrx: {  // adds wd, wn, wm{, extend {#amount}}
         auto [result, nzcv] =
-            arithmeticHelp::addExtend_3ops<uint32_t>(operands, metadata, true);
+            addExtend_3ops<uint32_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = {result, 8};
         break;
       }
       case Opcode::AArch64_ADDSXri: {  // adds xd, xn, #imm{, shift}
-        auto [result, nzcv] =
-            arithmeticHelp::addShift_imm<uint64_t>(operands, metadata, true);
+        auto [result, nzcv] = addShift_imm<uint64_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = result;
         break;
       }
       case Opcode::AArch64_ADDSXrs: {  // adds xd, xn, xm{, shift}
-        auto [result, nzcv] =
-            arithmeticHelp::addShift_3ops<uint64_t>(operands, metadata, true);
+        auto [result, nzcv] = addShift_3ops<uint64_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = result;
         break;
@@ -176,7 +170,7 @@ void Instruction::execute() {
       case Opcode::AArch64_ADDSXrx:      // adds xd, xn, wm{, extend {#amount}}
       case Opcode::AArch64_ADDSXrx64: {  // adds xd, xn, xm{, extend {#amount}}
         auto [result, nzcv] =
-            arithmeticHelp::addExtend_3ops<uint64_t>(operands, metadata, true);
+            addExtend_3ops<uint64_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = RegisterValue(result, 8);
         break;
@@ -190,135 +184,130 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_ADDVv4i16v: {  // addv hd, vn.4h
-        results[0] = neonHelp::vecSumElems_2ops<uint16_t, 4>(operands);
+        results[0] = vecSumElems_2ops<uint16_t, 4>(operands);
         break;
       }
       case Opcode::AArch64_ADDVv4i32v: {  // addv sd, vn.4s
-        results[0] = neonHelp::vecSumElems_2ops<uint32_t, 4>(operands);
+        results[0] = vecSumElems_2ops<uint32_t, 4>(operands);
         break;
       }
       case Opcode::AArch64_ADDVv8i8v: {  // addv bd, vn.8b
-        results[0] = neonHelp::vecSumElems_2ops<uint8_t, 8>(operands);
+        results[0] = vecSumElems_2ops<uint8_t, 8>(operands);
         break;
       }
       case Opcode::AArch64_ADDWri: {  // add wd, wn, #imm{, shift}
-        auto [result, nzcv] =
-            arithmeticHelp::addShift_imm<uint32_t>(operands, metadata, false);
+        auto [result, nzcv] = addShift_imm<uint32_t>(operands, metadata, false);
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_ADDWrs: {  // add wd, wn, wm{, shift #amount}
         auto [result, nzcv] =
-            arithmeticHelp::addShift_3ops<uint32_t>(operands, metadata, false);
+            addShift_3ops<uint32_t>(operands, metadata, false);
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_ADDWrx: {  // add wd, wn, wm{, extend #amount}
         auto [result, nzcv] =
-            arithmeticHelp::addExtend_3ops<uint32_t>(operands, metadata, false);
+            addExtend_3ops<uint32_t>(operands, metadata, false);
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_ADDXri: {  // add xd, xn, #imm{, shift}
-        auto [result, nzcv] =
-            arithmeticHelp::addShift_imm<uint64_t>(operands, metadata, false);
+        auto [result, nzcv] = addShift_imm<uint64_t>(operands, metadata, false);
         results[0] = result;
         break;
       }
       case Opcode::AArch64_ADDXrs: {  // add xd, xn, xm, {shift #amount}
         auto [result, nzcv] =
-            arithmeticHelp::addShift_3ops<uint64_t>(operands, metadata, false);
+            addShift_3ops<uint64_t>(operands, metadata, false);
         results[0] = result;
         break;
       }
       case Opcode::AArch64_ADDXrx:      // add xd, xn, wm{, extend {#amount}}
       case Opcode::AArch64_ADDXrx64: {  // add xd, xn, xm{, extend {#amount}}
         auto [result, nzcv] =
-            arithmeticHelp::addExtend_3ops<uint64_t>(operands, metadata, false);
+            addExtend_3ops<uint64_t>(operands, metadata, false);
         results[0] = result;
         break;
       }
       case Opcode::AArch64_ADD_ZI_B: {  // add zdn.b, zdn.b, imm{, shift}
-        results[0] = sveHelp::sveAdd_imm<uint8_t>(operands, metadata, VL_bits);
+        results[0] = sveAdd_imm<uint8_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZI_D: {  // add zdn.d, zdn.d, imm{, shift}
-        results[0] = sveHelp::sveAdd_imm<uint64_t>(operands, metadata, VL_bits);
+        results[0] = sveAdd_imm<uint64_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZI_H: {  // add zdn.h zdn.h, imm{, shift}
-        results[0] = sveHelp::sveAdd_imm<uint16_t>(operands, metadata, VL_bits);
+        results[0] = sveAdd_imm<uint16_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZI_S: {  // add zdn.s, zdn.s, imm{, shift}
-        results[0] = sveHelp::sveAdd_imm<uint32_t>(operands, metadata, VL_bits);
+        results[0] = sveAdd_imm<uint32_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZPmZ_B: {  // add zdn.b, pg/m, zdn.b, zm.b
-        results[0] = sveHelp::sveAddPredicated_vecs<uint8_t>(operands, VL_bits);
+        results[0] = sveAddPredicated_vecs<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZPmZ_D: {  // add zdn.d, pg/m, zdn.d, zm.d
-        results[0] =
-            sveHelp::sveAddPredicated_vecs<uint64_t>(operands, VL_bits);
+        results[0] = sveAddPredicated_vecs<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZPmZ_H: {  // add zdn.h, pg/m, zdn.h, zm.h
-        results[0] =
-            sveHelp::sveAddPredicated_vecs<uint16_t>(operands, VL_bits);
+        results[0] = sveAddPredicated_vecs<uint16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZPmZ_S: {  // add zdn.s, pg/m, zdn.s, zm.s
-        results[0] =
-            sveHelp::sveAddPredicated_vecs<uint32_t>(operands, VL_bits);
+        results[0] = sveAddPredicated_vecs<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZZZ_B: {  // add zd.b, zn.b, zm.b
-        results[0] = sveHelp::sveAdd_3ops<uint8_t>(operands, VL_bits);
+        results[0] = sveAdd_3ops<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZZZ_D: {  // add zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveAdd_3ops<uint64_t>(operands, VL_bits);
+        results[0] = sveAdd_3ops<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZZZ_H: {  // add zd.h, zn.h, zm.h
-        results[0] = sveHelp::sveAdd_3ops<uint16_t>(operands, VL_bits);
+        results[0] = sveAdd_3ops<uint16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_ADD_ZZZ_S: {  // add zd.s, zn.s, zm.s
-        results[0] = sveHelp::sveAdd_3ops<uint32_t>(operands, VL_bits);
+        results[0] = sveAdd_3ops<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_ADDv16i8: {  // add vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecAdd_3ops<uint8_t, 16>(operands);
+        results[0] = vecAdd_3ops<uint8_t, 16>(operands);
         break;
       }
       case Opcode::AArch64_ADDv1i64: {  // add dd, dn, dm
-        results[0] = neonHelp::vecAdd_3ops<uint64_t, 1>(operands);
+        results[0] = vecAdd_3ops<uint64_t, 1>(operands);
         break;
       }
       case Opcode::AArch64_ADDv2i32: {  // add vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecAdd_3ops<uint32_t, 2>(operands);
+        results[0] = vecAdd_3ops<uint32_t, 2>(operands);
         break;
       }
       case Opcode::AArch64_ADDv2i64: {  // add vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecAdd_3ops<uint64_t, 2>(operands);
+        results[0] = vecAdd_3ops<uint64_t, 2>(operands);
         break;
       }
       case Opcode::AArch64_ADDv4i16: {  // add vd.4h, vn.4h, vm.4h
-        results[0] = neonHelp::vecAdd_3ops<uint16_t, 4>(operands);
+        results[0] = vecAdd_3ops<uint16_t, 4>(operands);
         break;
       }
       case Opcode::AArch64_ADDv4i32: {  // add vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecAdd_3ops<uint32_t, 4>(operands);
+        results[0] = vecAdd_3ops<uint32_t, 4>(operands);
         break;
       }
       case Opcode::AArch64_ADDv8i16: {  // add vd.8h, vn.8h, vm.8h
-        results[0] = neonHelp::vecAdd_3ops<uint16_t, 8>(operands);
+        results[0] = vecAdd_3ops<uint16_t, 8>(operands);
         break;
       }
       case Opcode::AArch64_ADDv8i8: {  // add vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecAdd_3ops<uint8_t, 8>(operands);
+        results[0] = vecAdd_3ops<uint8_t, 8>(operands);
         break;
       }
       case Opcode::AArch64_ADR: {  // adr xd, #imm
@@ -336,20 +325,20 @@ void Instruction::execute() {
       case Opcode::AArch64_ADR_LSL_ZZZ_D_1:    // adr zd.d, [zn.d, zm.d, lsl #1]
       case Opcode::AArch64_ADR_LSL_ZZZ_D_2:    // adr zd.d, [zn.d, zm.d, lsl #2]
       case Opcode::AArch64_ADR_LSL_ZZZ_D_3: {  // adr zd.d, [zn.d, zm.d, lsl #3]
-        results[0] = sveHelp::sveAdr_packedOffsets<uint64_t>(operands, metadata,
-                                                             VL_bits);
+        results[0] =
+            sveAdr_packedOffsets<uint64_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_ADR_LSL_ZZZ_S_0:    // adr zd.s, [zn.s, zm.s]
       case Opcode::AArch64_ADR_LSL_ZZZ_S_1:    // adr zd.s, [zn.s, zm.s, lsl #1]
       case Opcode::AArch64_ADR_LSL_ZZZ_S_2:    // adr zd.s, [zn.s, zm.s, lsl #2]
       case Opcode::AArch64_ADR_LSL_ZZZ_S_3: {  // adr zd.s, [zn.s, zm.s, lsl #3]
-        results[0] = sveHelp::sveAdr_packedOffsets<uint32_t>(operands, metadata,
-                                                             VL_bits);
+        results[0] =
+            sveAdr_packedOffsets<uint32_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_ANDSWri: {  // ands wd, wn, #imm
-        auto [result, nzcv] = logicalHelp::logicOp_imm<uint32_t>(
+        auto [result, nzcv] = logicOp_imm<uint32_t>(
             operands, metadata, true,
             [](uint32_t x, uint32_t y) -> uint32_t { return x & y; });
         results[0] = nzcv;
@@ -357,7 +346,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_ANDSWrs: {  // ands wd, wn, wm{, shift #amount}
-        auto [result, nzcv] = logicalHelp::logicOpShift_3ops<uint32_t>(
+        auto [result, nzcv] = logicOpShift_3ops<uint32_t>(
             operands, metadata, true,
             [](uint32_t x, uint32_t y) -> uint32_t { return x & y; });
         results[0] = nzcv;
@@ -365,7 +354,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_ANDSXri: {  // ands xd, xn, #imm
-        auto [result, nzcv] = logicalHelp::logicOp_imm<uint64_t>(
+        auto [result, nzcv] = logicOp_imm<uint64_t>(
             operands, metadata, true,
             [](uint64_t x, uint64_t y) -> uint64_t { return x & y; });
         results[0] = nzcv;
@@ -373,7 +362,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_ANDSXrs: {  // ands xd, xn, xm{, shift #amount}
-        auto [result, nzcv] = logicalHelp::logicOpShift_3ops<uint64_t>(
+        auto [result, nzcv] = logicOpShift_3ops<uint64_t>(
             operands, metadata, true,
             [](uint64_t x, uint64_t y) -> uint64_t { return x & y; });
         results[0] = nzcv;
@@ -381,35 +370,35 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_ANDWri: {  // and wd, wn, #imm
-        auto [result, nzcv] = logicalHelp::logicOp_imm<uint32_t>(
+        auto [result, nzcv] = logicOp_imm<uint32_t>(
             operands, metadata, false,
             [](uint32_t x, uint32_t y) -> uint32_t { return x & y; });
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_ANDWrs: {  // and wd, wn, wm{, shift #amount}
-        auto [result, nzcv] = logicalHelp::logicOpShift_3ops<uint32_t>(
+        auto [result, nzcv] = logicOpShift_3ops<uint32_t>(
             operands, metadata, false,
             [](uint32_t x, uint32_t y) -> uint32_t { return x & y; });
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_ANDXri: {  // and xd, xn, #imm
-        auto [result, nzcv] = logicalHelp::logicOp_imm<uint64_t>(
+        auto [result, nzcv] = logicOp_imm<uint64_t>(
             operands, metadata, false,
             [](uint64_t x, uint64_t y) -> uint64_t { return x & y; });
         results[0] = result;
         break;
       }
       case Opcode::AArch64_ANDXrs: {  // and xd, xn, xm{, shift #amount}
-        auto [result, nzcv] = logicalHelp::logicOpShift_3ops<uint64_t>(
+        auto [result, nzcv] = logicOpShift_3ops<uint64_t>(
             operands, metadata, false,
             [](uint64_t x, uint64_t y) -> uint64_t { return x & y; });
         results[0] = result;
         break;
       }
       case Opcode::AArch64_AND_PPzPP: {  // and pd.b, pg/z, pn.b, pm.b
-        results[0] = sveHelp::sveLogicOp_preds<uint8_t>(
+        results[0] = sveLogicOp_preds<uint8_t>(
             operands, VL_bits,
             [](uint64_t x, uint64_t y) -> uint64_t { return x & y; });
         break;
@@ -427,45 +416,45 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_AND_ZPmZ_B: {  // and zdn.b, pg/m, zdn.b, zm.b
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<uint8_t>(
+        results[0] = sveLogicOpPredicated_3vecs<uint8_t>(
             operands, VL_bits,
             [](uint8_t x, uint8_t y) -> uint8_t { return x & y; });
         break;
       }
       case Opcode::AArch64_AND_ZPmZ_D: {  // and zdn.d, pg/m, zdn.d, zm.d
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<uint64_t>(
+        results[0] = sveLogicOpPredicated_3vecs<uint64_t>(
             operands, VL_bits,
             [](uint64_t x, uint64_t y) -> uint64_t { return x & y; });
         break;
       }
       case Opcode::AArch64_AND_ZPmZ_H: {  // and zdn.h, pg/m, zdn.h, zm.h
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<uint16_t>(
+        results[0] = sveLogicOpPredicated_3vecs<uint16_t>(
             operands, VL_bits,
             [](uint16_t x, uint16_t y) -> uint16_t { return x & y; });
         break;
       }
       case Opcode::AArch64_AND_ZPmZ_S: {  // and zdn.s, pg/m, zdn.s, zm.s
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<uint32_t>(
+        results[0] = sveLogicOpPredicated_3vecs<uint32_t>(
             operands, VL_bits,
             [](uint32_t x, uint32_t y) -> uint32_t { return x & y; });
         break;
       }
       case Opcode::AArch64_ANDv16i8: {  // and vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecLogicOp_3vecs<uint8_t, 16>(
+        results[0] = vecLogicOp_3vecs<uint8_t, 16>(
             operands, [](uint8_t x, uint8_t y) -> uint8_t { return x & y; });
         break;
       }
       case Opcode::AArch64_ANDv8i8: {  // and vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecLogicOp_3vecs<uint8_t, 8>(
+        results[0] = vecLogicOp_3vecs<uint8_t, 8>(
             operands, [](uint8_t x, uint8_t y) -> uint8_t { return x & y; });
         break;
       }
       case Opcode::AArch64_ASRVWr: {  // asrv wd, wn, wm
-        results[0] = {logicalHelp::asrv_3gpr<int32_t>(operands), 8};
+        results[0] = {asrv_3gpr<int32_t>(operands), 8};
         break;
       }
       case Opcode::AArch64_ASRVXr: {  // asrv xd, xn, xm
-        results[0] = logicalHelp::asrv_3gpr<int64_t>(operands);
+        results[0] = asrv_3gpr<int64_t>(operands);
         break;
       }
       case Opcode::AArch64_B: {  // b label
@@ -474,68 +463,63 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_BFMWri: {  // bfm wd, wn, #immr, #imms
-        results[0] = {
-            bitmanipHelp::bfm_2imms<uint32_t>(operands, metadata, false, false),
-            8};
+        results[0] = {bfm_2imms<uint32_t>(operands, metadata, false, false), 8};
         break;
       }
       case Opcode::AArch64_BFMXri: {  // bfm xd, xn, #immr, #imms
-        results[0] =
-            bitmanipHelp::bfm_2imms<uint64_t>(operands, metadata, false, false);
+        results[0] = bfm_2imms<uint64_t>(operands, metadata, false, false);
         break;
       }
       case Opcode::AArch64_BICSWrs: {  // bics wd, wn, wm{, shift #amount}
-        auto [result, nzcv] =
-            logicalHelp::bicShift_3ops<uint32_t>(operands, metadata, true);
+        auto [result, nzcv] = bicShift_3ops<uint32_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = {result, 8};
         break;
       }
       case Opcode::AArch64_BICSXrs: {  // bics xd, xn, xm{, shift #amount}
-        auto [result, nzcv] =
-            logicalHelp::bicShift_3ops<uint64_t>(operands, metadata, true);
+        auto [result, nzcv] = bicShift_3ops<uint64_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = result;
         break;
       }
       case Opcode::AArch64_BICWrs: {  // bic wd, wn, wm{, shift #amount}
         auto [result, nzcv] =
-            logicalHelp::bicShift_3ops<uint32_t>(operands, metadata, false);
+            bicShift_3ops<uint32_t>(operands, metadata, false);
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_BICXrs: {  // bic xd, xn, xm{, shift #amount}
         auto [result, nzcv] =
-            logicalHelp::bicShift_3ops<uint64_t>(operands, metadata, false);
+            bicShift_3ops<uint64_t>(operands, metadata, false);
         results[0] = result;
         break;
       }
       case Opcode::AArch64_BICv16i8: {  // bic vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecBic_3ops<uint8_t, 16>(operands);
+        results[0] = vecBic_3ops<uint8_t, 16>(operands);
         break;
       }
       case Opcode::AArch64_BICv4i32: {  // bic vd.4s, #imm{, lsl #shift}
-        results[0] = neonHelp::vecBicShift_imm<uint32_t, 4>(operands, metadata);
+        results[0] = vecBicShift_imm<uint32_t, 4>(operands, metadata);
         break;
       }
       case Opcode::AArch64_BICv8i16: {  // bic vd.8h, #imm{, lsl #shift}
-        results[0] = neonHelp::vecBicShift_imm<uint16_t, 8>(operands, metadata);
+        results[0] = vecBicShift_imm<uint16_t, 8>(operands, metadata);
         break;
       }
       case Opcode::AArch64_BICv8i8: {  // bic vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecBic_3ops<uint8_t, 8>(operands);
+        results[0] = vecBic_3ops<uint8_t, 8>(operands);
         break;
       }
       case Opcode::AArch64_BIFv16i8: {  // bif vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecBitwiseInsert<16>(operands, true);
+        results[0] = vecBitwiseInsert<16>(operands, true);
         break;
       }
       case Opcode::AArch64_BITv16i8: {  // bit vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecBitwiseInsert<16>(operands, false);
+        results[0] = vecBitwiseInsert<16>(operands, false);
         break;
       }
       case Opcode::AArch64_BITv8i8: {  // bit vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecBitwiseInsert<8>(operands, false);
+        results[0] = vecBitwiseInsert<8>(operands, false);
         break;
       }
       case Opcode::AArch64_BL: {  // bl #imm
@@ -560,11 +544,11 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_BSLv16i8: {  // bsl vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecBsl<16>(operands);
+        results[0] = vecBsl<16>(operands);
         break;
       }
       case Opcode::AArch64_Bcc: {  // b.cond label
-        if (AuxFunc::conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
+        if (conditionHolds(metadata.cc, operands[0].get<uint8_t>())) {
           branchTaken_ = true;
           branchAddress_ = instructionAddress_ + metadata.operands[0].imm;
         } else {
@@ -590,7 +574,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CBNZW: {  // cbnz wn, #imm
-        auto [taken, addr] = conditionalHelp::condBranch_cmpToZero<uint32_t>(
+        auto [taken, addr] = condBranch_cmpToZero<uint32_t>(
             operands, metadata, instructionAddress_,
             [](uint32_t x) -> bool { return x != 0; });
         branchTaken_ = taken;
@@ -598,7 +582,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CBNZX: {  // cbnz xn, #imm
-        auto [taken, addr] = conditionalHelp::condBranch_cmpToZero<uint64_t>(
+        auto [taken, addr] = condBranch_cmpToZero<uint64_t>(
             operands, metadata, instructionAddress_,
             [](uint64_t x) -> bool { return x != 0; });
         branchTaken_ = taken;
@@ -606,7 +590,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CBZW: {  // cbz wn, #imm
-        auto [taken, addr] = conditionalHelp::condBranch_cmpToZero<uint32_t>(
+        auto [taken, addr] = condBranch_cmpToZero<uint32_t>(
             operands, metadata, instructionAddress_,
             [](uint32_t x) -> bool { return x == 0; });
         branchTaken_ = taken;
@@ -614,7 +598,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CBZX: {  // cbz xn, #imm
-        auto [taken, addr] = conditionalHelp::condBranch_cmpToZero<uint64_t>(
+        auto [taken, addr] = condBranch_cmpToZero<uint64_t>(
             operands, metadata, instructionAddress_,
             [](uint64_t x) -> bool { return x == 0; });
         branchTaken_ = taken;
@@ -622,77 +606,77 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CCMNWi: {  // ccmn wn, #imm, #nzcv, cc
-        results[0] = conditionalHelp::ccmn_imm<uint32_t>(operands, metadata);
+        results[0] = ccmn_imm<uint32_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_CCMNXi: {  // ccmn xn, #imm, #nzcv, cc
-        results[0] = conditionalHelp::ccmn_imm<uint64_t>(operands, metadata);
+        results[0] = ccmn_imm<uint64_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_CCMPWi: {  // ccmp wn, #imm, #nzcv, cc
-        results[0] = conditionalHelp::ccmp_imm<uint32_t>(operands, metadata);
+        results[0] = ccmp_imm<uint32_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_CCMPWr: {  // ccmp wn, wm, #nzcv, cc
-        results[0] = conditionalHelp::ccmp_reg<uint32_t>(operands, metadata);
+        results[0] = ccmp_reg<uint32_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_CCMPXi: {  // ccmp xn, #imm, #nzcv, cc
-        results[0] = conditionalHelp::ccmp_imm<uint64_t>(operands, metadata);
+        results[0] = ccmp_imm<uint64_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_CCMPXr: {  // ccmp xn, xm, #nzcv, cc
-        results[0] = conditionalHelp::ccmp_reg<uint64_t>(operands, metadata);
+        results[0] = ccmp_reg<uint64_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_CLZXr: {  // clz xd, xn
-        results[0] = arithmeticHelp::clz_reg<int64_t>(operands);
+        results[0] = clz_reg<int64_t>(operands);
         break;
       }
       case Opcode::AArch64_CMEQv16i8: {  // cmeq vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecCompare<uint8_t, 16>(
+        results[0] = vecCompare<uint8_t, 16>(
             operands, false,
             [](uint8_t x, uint8_t y) -> bool { return (x == y); });
         break;
       }
       case Opcode::AArch64_CMEQv16i8rz: {  // cmeq vd.16b, vn.16b, #0
-        results[0] = neonHelp::vecCompare<uint8_t, 16>(
+        results[0] = vecCompare<uint8_t, 16>(
             operands, true,
             [](uint8_t x, uint8_t y) -> bool { return (x == y); });
         break;
       }
       case Opcode::AArch64_CMEQv4i32: {  // cmeq vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecCompare<uint32_t, 4>(
+        results[0] = vecCompare<uint32_t, 4>(
             operands, false,
             [](uint32_t x, uint32_t y) -> bool { return (x == y); });
         break;
       }
       case Opcode::AArch64_CMEQv8i8: {  // cmeq vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecCompare<int8_t, 8>(
+        results[0] = vecCompare<int8_t, 8>(
             operands, false,
             [](int8_t x, int8_t y) -> bool { return (x == y); });
         break;
       }
       case Opcode::AArch64_CMEQv8i8rz: {  // cmeq vd.8b, vn.8b, #0
-        results[0] = neonHelp::vecCompare<int8_t, 8>(
+        results[0] = vecCompare<int8_t, 8>(
             operands, true,
             [](int8_t x, int8_t y) -> bool { return (x == y); });
         break;
       }
       case Opcode::AArch64_CMHIv4i32: {  // cmhi vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecCompare<uint32_t, 4>(
+        results[0] = vecCompare<uint32_t, 4>(
             operands, false,
             [](uint32_t x, uint32_t y) -> bool { return (x > y); });
         break;
       }
       case Opcode::AArch64_CMHSv16i8: {  // cmhs vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecCompare<int8_t, 16>(
+        results[0] = vecCompare<int8_t, 16>(
             operands, false,
             [](int8_t x, int8_t y) -> bool { return (x >= y); });
         break;
       }
       case Opcode::AArch64_CMPEQ_PPzZI_B: {  // cmpeq pd.b, pg/z, zn.b, #imm
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint8_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint8_t>(
             operands, metadata, VL_bits, true,
             [](uint8_t x, uint8_t y) -> bool { return x == y; });
         results[0] = nzcv;
@@ -700,7 +684,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPEQ_PPzZI_D: {  // cmpeq pd.d, pg/z, zn.d, #imm
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint64_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint64_t>(
             operands, metadata, VL_bits, true,
             [](uint64_t x, uint64_t y) -> bool { return x == y; });
         results[0] = nzcv;
@@ -708,7 +692,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPEQ_PPzZI_H: {  // cmpeq pd.h, pg/z, zn.h, #imm
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint16_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint16_t>(
             operands, metadata, VL_bits, true,
             [](uint16_t x, uint16_t y) -> bool { return x == y; });
         results[0] = nzcv;
@@ -716,7 +700,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPEQ_PPzZI_S: {  // cmpeq pd.s, pg/z, zn.s, #imm
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint32_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint32_t>(
             operands, metadata, VL_bits, true,
             [](uint32_t x, uint32_t y) -> bool { return x == y; });
         results[0] = nzcv;
@@ -724,7 +708,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPEQ_PPzZZ_B: {  // cmpeq pd.b, pg/z, zn.b, zm.b
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint8_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint8_t>(
             operands, metadata, VL_bits, false,
             [](uint8_t x, uint8_t y) -> bool { return x == y; });
         results[0] = nzcv;
@@ -732,7 +716,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPEQ_PPzZZ_D: {  // cmpeq pd.d, pg/z, zn.d, zm.d
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint64_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint64_t>(
             operands, metadata, VL_bits, false,
             [](uint64_t x, uint64_t y) -> bool { return x == y; });
         results[0] = nzcv;
@@ -740,7 +724,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPEQ_PPzZZ_H: {  // cmpeq pd.h, pg/z, zn.h, zm.h
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint16_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint16_t>(
             operands, metadata, VL_bits, false,
             [](uint16_t x, uint16_t y) -> bool { return x == y; });
         results[0] = nzcv;
@@ -748,7 +732,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPEQ_PPzZZ_S: {  // cmpeq pd.s, pg/z, zn.s, zm.s
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint32_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint32_t>(
             operands, metadata, VL_bits, false,
             [](uint32_t x, uint32_t y) -> bool { return x == y; });
         results[0] = nzcv;
@@ -756,7 +740,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPGT_PPzZZ_B: {  // cmpgt pd.b, pg/z, zn.b, zm.b
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int8_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int8_t>(
             operands, metadata, VL_bits, false,
             [](int8_t x, int8_t y) -> bool { return x > y; });
         results[0] = nzcv;
@@ -764,7 +748,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPGT_PPzZZ_D: {  // cmpgt pd.d, pg/z, zn.d, zm.d
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int64_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int64_t>(
             operands, metadata, VL_bits, false,
             [](int64_t x, int64_t y) -> bool { return x > y; });
         results[0] = nzcv;
@@ -772,7 +756,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPGT_PPzZZ_H: {  // cmpgt pd.h, pg/z, zn.h, zm.h
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int16_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int16_t>(
             operands, metadata, VL_bits, false,
             [](int16_t x, int16_t y) -> bool { return x > y; });
         results[0] = nzcv;
@@ -780,7 +764,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPGT_PPzZZ_S: {  // cmpgt pd.s, pg/z, zn.s, zm.s
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int32_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int32_t>(
             operands, metadata, VL_bits, false,
             [](int32_t x, int32_t y) -> bool { return x > y; });
         results[0] = nzcv;
@@ -788,7 +772,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPHI_PPzZZ_B: {  // cmphi pd.b, pg/z, zn.b, zm.b
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint8_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint8_t>(
             operands, metadata, VL_bits, false,
             [](uint8_t x, uint8_t y) -> bool { return x > y; });
         results[0] = nzcv;
@@ -796,7 +780,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPHI_PPzZZ_D: {  // cmphi pd.d, pg/z, zn.d, zm.d
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint64_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint64_t>(
             operands, metadata, VL_bits, false,
             [](uint64_t x, uint64_t y) -> bool { return x > y; });
         results[0] = nzcv;
@@ -804,7 +788,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPHI_PPzZZ_H: {  // cmphi pd.h, pg/z, zn.h, zm.h
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint16_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint16_t>(
             operands, metadata, VL_bits, false,
             [](uint16_t x, uint16_t y) -> bool { return x > y; });
         results[0] = nzcv;
@@ -812,7 +796,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPHI_PPzZZ_S: {  // cmphi pd.s, pg/z, zn.s, zm.s
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<uint32_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<uint32_t>(
             operands, metadata, VL_bits, false,
             [](uint32_t x, uint32_t y) -> bool { return x > y; });
         results[0] = nzcv;
@@ -820,7 +804,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPNE_PPzZI_B: {  // cmpne pd.b, pg/z. zn.b, #imm
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int8_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int8_t>(
             operands, metadata, VL_bits, true,
             [](int8_t x, int8_t y) -> bool { return x != y; });
         results[0] = nzcv;
@@ -828,7 +812,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPNE_PPzZI_D: {  // cmpne pd.d, pg/z. zn.d, #imm
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int64_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int64_t>(
             operands, metadata, VL_bits, true,
             [](int64_t x, int64_t y) -> bool { return x != y; });
         results[0] = nzcv;
@@ -836,7 +820,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPNE_PPzZI_H: {  // cmpne pd.h, pg/z. zn.h, #imm
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int16_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int16_t>(
             operands, metadata, VL_bits, true,
             [](int16_t x, int16_t y) -> bool { return x != y; });
         results[0] = nzcv;
@@ -844,7 +828,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPNE_PPzZI_S: {  // cmpne pd.s, pg/z. zn.s, #imm
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int32_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int32_t>(
             operands, metadata, VL_bits, true,
             [](int32_t x, int32_t y) -> bool { return x != y; });
         results[0] = nzcv;
@@ -852,7 +836,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPNE_PPzZZ_B: {  // cmpne pd.b, pg/z, zn.b, zm.b
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int8_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int8_t>(
             operands, metadata, VL_bits, false,
             [](int8_t x, int8_t y) -> bool { return x != y; });
         results[0] = nzcv;
@@ -860,7 +844,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPNE_PPzZZ_D: {  // cmpne pd.d, pg/z, zn.d, zm.d
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int64_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int64_t>(
             operands, metadata, VL_bits, false,
             [](int64_t x, int64_t y) -> bool { return x != y; });
         results[0] = nzcv;
@@ -868,7 +852,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPNE_PPzZZ_H: {  // cmpne pd.h, pg/z, zn.h, zm.h
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int16_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int16_t>(
             operands, metadata, VL_bits, false,
             [](int16_t x, int16_t y) -> bool { return x != y; });
         results[0] = nzcv;
@@ -876,7 +860,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CMPNE_PPzZZ_S: {  // cmpne pd.s, pg/z, zn.s, zm.s
-        auto [output, nzcv] = sveHelp::sveCmpPredicated_toPred<int32_t>(
+        auto [output, nzcv] = sveCmpPredicated_toPred<int32_t>(
             operands, metadata, VL_bits, false,
             [](int32_t x, int32_t y) -> bool { return x != y; });
         results[0] = nzcv;
@@ -884,123 +868,118 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_CNTB_XPiI: {  // cntb xd{, pattern{, #imm}}
-        results[0] = sveHelp::sveCnt_gpr<uint8_t>(metadata, VL_bits);
+        results[0] = sveCnt_gpr<uint8_t>(metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_CNTD_XPiI: {  // cntd xd{, pattern{, #imm}}
-        results[0] = sveHelp::sveCnt_gpr<uint64_t>(metadata, VL_bits);
+        results[0] = sveCnt_gpr<uint64_t>(metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_CNTH_XPiI: {  // cnth xd{, pattern{, #imm}}
-        results[0] = sveHelp::sveCnt_gpr<uint16_t>(metadata, VL_bits);
+        results[0] = sveCnt_gpr<uint16_t>(metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_CNTP_XPP_B: {  // cntp xd, pg, pn.b
-        results[0] = sveHelp::sveCntp<uint8_t>(operands, VL_bits);
+        results[0] = sveCntp<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_CNTP_XPP_D: {  // cntp xd, pg, pn.d
-        results[0] = sveHelp::sveCntp<uint8_t>(operands, VL_bits);
+        results[0] = sveCntp<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_CNTP_XPP_H: {  // cntp xd, pg, pn.h
-        results[0] = sveHelp::sveCntp<uint8_t>(operands, VL_bits);
+        results[0] = sveCntp<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_CNTP_XPP_S: {  // cntp xd, pg, pn.s
-        results[0] = sveHelp::sveCntp<uint8_t>(operands, VL_bits);
+        results[0] = sveCntp<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_CNTW_XPiI: {  // cntw xd{, pattern{, #imm}}
-        results[0] = sveHelp::sveCnt_gpr<uint32_t>(metadata, VL_bits);
+        results[0] = sveCnt_gpr<uint32_t>(metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_CNTv8i8: {  // cnt vd.8b, vn.8b
-        results[0] = neonHelp::vecCountPerByte<uint8_t, 8>(operands);
+        results[0] = vecCountPerByte<uint8_t, 8>(operands);
         break;
       }
       case Opcode::AArch64_CPY_ZPzI_B: {  // cpy zd.b, pg/z, #imm{, shift}
-        results[0] = sveHelp::sveCpy_imm<int8_t>(operands, metadata, VL_bits);
+        results[0] = sveCpy_imm<int8_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_CPY_ZPzI_D: {  // cpy zd.d, pg/z, #imm{, shift}
-        results[0] = sveHelp::sveCpy_imm<int64_t>(operands, metadata, VL_bits);
+        results[0] = sveCpy_imm<int64_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_CPY_ZPzI_H: {  // cpy zd.h, pg/z, #imm{, shift}
-        results[0] = sveHelp::sveCpy_imm<int16_t>(operands, metadata, VL_bits);
+        results[0] = sveCpy_imm<int16_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_CPY_ZPzI_S: {  // cpy zd.s, pg/z, #imm{, shift}
-        results[0] = sveHelp::sveCpy_imm<int32_t>(operands, metadata, VL_bits);
+        results[0] = sveCpy_imm<int32_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_DUPi32: {  // dup vd, vn.s[index]
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint32_t, 1>(operands, metadata, false);
+        results[0] = vecDup_gprOrIndex<uint32_t, 1>(operands, metadata, false);
         break;
       }
       case Opcode::AArch64_DUPi64: {  // dup vd, vn.d[index]
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint64_t, 1>(operands, metadata, false);
+        results[0] = vecDup_gprOrIndex<uint64_t, 1>(operands, metadata, false);
         break;
       }
       case Opcode::AArch64_CSELWr: {  // csel wd, wn, wm, cc
         results[0] = {
-            conditionalHelp::cs_4ops<uint32_t>(
-                operands, metadata, [](uint32_t x) -> uint32_t { return x; }),
+            cs_4ops<uint32_t>(operands, metadata,
+                              [](uint32_t x) -> uint32_t { return x; }),
             8};
         break;
       }
       case Opcode::AArch64_CSELXr: {  // csel xd, xn, xm, cc
-        results[0] = conditionalHelp::cs_4ops<uint64_t>(
+        results[0] = cs_4ops<uint64_t>(
             operands, metadata, [](uint64_t x) -> uint64_t { return x; });
         break;
       }
       case Opcode::AArch64_CSINCWr: {  // csinc wd, wn, wm, cc
-        results[0] = {conditionalHelp::cs_4ops<uint32_t>(
-                          operands, metadata,
-                          [](uint32_t x) -> uint32_t { return x + 1; }),
-                      8};
+        results[0] = {
+            cs_4ops<uint32_t>(operands, metadata,
+                              [](uint32_t x) -> uint32_t { return x + 1; }),
+            8};
         break;
       }
       case Opcode::AArch64_CSINCXr: {  // csinc xd, xn, xm, cc
-        results[0] = conditionalHelp::cs_4ops<uint64_t>(
+        results[0] = cs_4ops<uint64_t>(
             operands, metadata, [](uint64_t x) -> uint64_t { return x + 1; });
         break;
       }
       case Opcode::AArch64_CSINVWr: {  // csinv wd, wn, wm, cc
         results[0] = {
-            conditionalHelp::cs_4ops<uint32_t>(
-                operands, metadata, [](uint32_t x) -> uint32_t { return ~x; }),
+            cs_4ops<uint32_t>(operands, metadata,
+                              [](uint32_t x) -> uint32_t { return ~x; }),
             8};
         break;
       }
       case Opcode::AArch64_CSINVXr: {  // csinv xd, xn, xm, cc
-        results[0] = conditionalHelp::cs_4ops<uint64_t>(
+        results[0] = cs_4ops<uint64_t>(
             operands, metadata, [](uint64_t x) -> uint64_t { return ~x; });
         break;
       }
       case Opcode::AArch64_CSNEGWr: {  // csneg wd, wn, wm, cc
-        results[0] = {
-            conditionalHelp::cs_4ops<int32_t>(
-                operands, metadata, [](int32_t x) -> int32_t { return -x; }),
-            8};
+        results[0] = {cs_4ops<int32_t>(operands, metadata,
+                                       [](int32_t x) -> int32_t { return -x; }),
+                      8};
         break;
       }
       case Opcode::AArch64_CSNEGXr: {  // csneg xd, xn, xm, cc
-        results[0] = conditionalHelp::cs_4ops<uint64_t>(
+        results[0] = cs_4ops<uint64_t>(
             operands, metadata, [](uint64_t x) -> uint64_t { return -x; });
         break;
       }
       case Opcode::AArch64_DECB_XPiI: {  // decb xdn{, pattern{, MUL #imm}}
-        results[0] =
-            sveHelp::sveDec_scalar<int8_t>(operands, metadata, VL_bits);
+        results[0] = sveDec_scalar<int8_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_DECD_XPiI: {  // decd xdn{, pattern{, MUL #imm}}
-        results[0] =
-            sveHelp::sveDec_scalar<int64_t>(operands, metadata, VL_bits);
+        results[0] = sveDec_scalar<int64_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_DMB: {  // dmb option|#imm
@@ -1017,48 +996,47 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_DUP_ZI_B: {  // dup zd.b, #imm{, shift}
-        results[0] = sveHelp::sveDup_immOrScalar<int8_t>(operands, metadata,
-                                                         VL_bits, true);
+        results[0] =
+            sveDup_immOrScalar<int8_t>(operands, metadata, VL_bits, true);
         break;
       }
       case Opcode::AArch64_DUP_ZI_D: {  // dup zd.d, #imm{, shift}
-        results[0] = sveHelp::sveDup_immOrScalar<int64_t>(operands, metadata,
-                                                          VL_bits, true);
+        results[0] =
+            sveDup_immOrScalar<int64_t>(operands, metadata, VL_bits, true);
         break;
       }
       case Opcode::AArch64_DUP_ZI_H: {  // dup zd.h, #imm{, shift}
-        results[0] = sveHelp::sveDup_immOrScalar<int16_t>(operands, metadata,
-                                                          VL_bits, true);
+        results[0] =
+            sveDup_immOrScalar<int16_t>(operands, metadata, VL_bits, true);
         break;
       }
       case Opcode::AArch64_DUP_ZI_S: {  // dup zd.s, #imm{, shift}
-        results[0] = sveHelp::sveDup_immOrScalar<int32_t>(operands, metadata,
-                                                          VL_bits, true);
+        results[0] =
+            sveDup_immOrScalar<int32_t>(operands, metadata, VL_bits, true);
         break;
       }
       case Opcode::AArch64_DUP_ZR_B: {  // dup zd.b, wn
-        results[0] = sveHelp::sveDup_immOrScalar<int8_t>(operands, metadata,
-                                                         VL_bits, false);
+        results[0] =
+            sveDup_immOrScalar<int8_t>(operands, metadata, VL_bits, false);
         break;
       }
       case Opcode::AArch64_DUP_ZR_D: {  // dup zd.d, xn
-        results[0] = sveHelp::sveDup_immOrScalar<int64_t>(operands, metadata,
-                                                          VL_bits, false);
+        results[0] =
+            sveDup_immOrScalar<int64_t>(operands, metadata, VL_bits, false);
         break;
       }
       case Opcode::AArch64_DUP_ZR_H: {  // dup zd.h, wn
-        results[0] = sveHelp::sveDup_immOrScalar<int16_t>(operands, metadata,
-                                                          VL_bits, false);
+        results[0] =
+            sveDup_immOrScalar<int16_t>(operands, metadata, VL_bits, false);
         break;
       }
       case Opcode::AArch64_DUP_ZR_S: {  // dup zd.s, wn
-        results[0] = sveHelp::sveDup_immOrScalar<int32_t>(operands, metadata,
-                                                          VL_bits, false);
+        results[0] =
+            sveDup_immOrScalar<int32_t>(operands, metadata, VL_bits, false);
         break;
       }
       case Opcode::AArch64_DUP_ZZI_D: {  // dup zd.d, zn.d[#imm]
-        results[0] =
-            sveHelp::sveDup_vecIndexed<uint64_t>(operands, metadata, VL_bits);
+        results[0] = sveDup_vecIndexed<uint64_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_DUP_ZZI_Q: {  // dup zd.q, zn.q[#imm]
@@ -1083,126 +1061,116 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_DUP_ZZI_S: {  // dup zd.s, zn.s[#imm]
-        results[0] =
-            sveHelp::sveDup_vecIndexed<uint32_t>(operands, metadata, VL_bits);
+        results[0] = sveDup_vecIndexed<uint32_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_DUPv16i8gpr: {  // dup vd.16b, wn
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint8_t, 16>(operands, metadata, true);
+        results[0] = vecDup_gprOrIndex<uint8_t, 16>(operands, metadata, true);
         break;
       }
       case Opcode::AArch64_DUPv2i32gpr: {  // dup vd.2s, wn
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint32_t, 2>(operands, metadata, true);
+        results[0] = vecDup_gprOrIndex<uint32_t, 2>(operands, metadata, true);
         break;
       }
       case Opcode::AArch64_DUPv2i32lane: {  // dup vd.2s, vn.s[index]
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint32_t, 2>(operands, metadata, false);
+        results[0] = vecDup_gprOrIndex<uint32_t, 2>(operands, metadata, false);
         break;
       }
       case Opcode::AArch64_DUPv2i64gpr: {  // dup vd.2d, xn
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint64_t, 2>(operands, metadata, true);
+        results[0] = vecDup_gprOrIndex<uint64_t, 2>(operands, metadata, true);
         break;
       }
       case Opcode::AArch64_DUPv2i64lane: {  // dup vd.2d, vn.d[index]
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint64_t, 2>(operands, metadata, false);
+        results[0] = vecDup_gprOrIndex<uint64_t, 2>(operands, metadata, false);
         break;
       }
       case Opcode::AArch64_DUPv4i16gpr: {  // dup vd.4h, wn
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint16_t, 4>(operands, metadata, true);
+        results[0] = vecDup_gprOrIndex<uint16_t, 4>(operands, metadata, true);
         break;
       }
       case Opcode::AArch64_DUPv4i32gpr: {  // dup vd.4s, wn
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint32_t, 4>(operands, metadata, true);
+        results[0] = vecDup_gprOrIndex<uint32_t, 4>(operands, metadata, true);
         break;
       }
       case Opcode::AArch64_DUPv4i32lane: {  // dup vd.4s, vn.s[index]
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint32_t, 4>(operands, metadata, false);
+        results[0] = vecDup_gprOrIndex<uint32_t, 4>(operands, metadata, false);
         break;
       }
       case Opcode::AArch64_DUPv8i16gpr: {  // dup vd.8h, wn
-        results[0] =
-            neonHelp::vecDup_gprOrIndex<uint16_t, 8>(operands, metadata, true);
+        results[0] = vecDup_gprOrIndex<uint16_t, 8>(operands, metadata, true);
         break;
       }
       case Opcode::AArch64_EORWri: {  // eor wd, wn, #imm
-        auto [result, nzcv] = logicalHelp::logicOp_imm<uint32_t>(
+        auto [result, nzcv] = logicOp_imm<uint32_t>(
             operands, metadata, false,
             [](uint32_t x, uint32_t y) -> uint32_t { return x ^ y; });
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_EORWrs: {  // eor wd, wn, wm{, shift #imm}
-        auto [result, nzcv] = logicalHelp::logicOpShift_3ops<uint32_t>(
+        auto [result, nzcv] = logicOpShift_3ops<uint32_t>(
             operands, metadata, false,
             [](uint32_t x, uint32_t y) -> uint32_t { return x ^ y; });
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_EORXri: {  // eor xd, xn, #imm
-        auto [result, nzcv] = logicalHelp::logicOp_imm<uint64_t>(
+        auto [result, nzcv] = logicOp_imm<uint64_t>(
             operands, metadata, false,
             [](uint64_t x, uint64_t y) -> uint64_t { return x ^ y; });
         results[0] = result;
         break;
       }
       case Opcode::AArch64_EORXrs: {  // eor xd, xn, xm{, shift #amount}
-        auto [result, nzcv] = logicalHelp::logicOpShift_3ops<uint64_t>(
+        auto [result, nzcv] = logicOpShift_3ops<uint64_t>(
             operands, metadata, false,
             [](uint64_t x, uint64_t y) -> uint64_t { return x ^ y; });
         results[0] = result;
         break;
       }
       case Opcode::AArch64_EOR_PPzPP: {
-        results[0] = sveHelp::sveLogicOp_preds<uint8_t>(
+        results[0] = sveLogicOp_preds<uint8_t>(
             operands, VL_bits,
             [](uint64_t x, uint64_t y) -> uint64_t { return x ^ y; });
         break;
       }
       case Opcode::AArch64_EOR_ZPmZ_B: {  // eor zdn.b, pg/m, zdn.b, zm.b
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<uint8_t>(
+        results[0] = sveLogicOpPredicated_3vecs<uint8_t>(
             operands, VL_bits,
             [](uint8_t x, uint8_t y) -> uint8_t { return x ^ y; });
         break;
       }
       case Opcode::AArch64_EOR_ZPmZ_D: {  // eor zdn.d, pg/m, zdn.d, zm.d
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<uint64_t>(
+        results[0] = sveLogicOpPredicated_3vecs<uint64_t>(
             operands, VL_bits,
             [](uint64_t x, uint64_t y) -> uint64_t { return x ^ y; });
         break;
       }
       case Opcode::AArch64_EOR_ZPmZ_H: {  // eor zdn.h, pg/m, zdn.h, zm.h
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<uint16_t>(
+        results[0] = sveLogicOpPredicated_3vecs<uint16_t>(
             operands, VL_bits,
             [](uint16_t x, uint16_t y) -> uint16_t { return x ^ y; });
         break;
       }
       case Opcode::AArch64_EOR_ZPmZ_S: {  // eor zdn.s, pg/m, zdn.s, zm.s
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<uint32_t>(
+        results[0] = sveLogicOpPredicated_3vecs<uint32_t>(
             operands, VL_bits,
             [](uint32_t x, uint32_t y) -> uint32_t { return x ^ y; });
         break;
       }
       case Opcode::AArch64_EOR_ZZZ: {  // eor zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveLogicOpUnPredicated_3vecs<uint64_t>(
+        results[0] = sveLogicOpUnPredicated_3vecs<uint64_t>(
             operands, VL_bits,
             [](uint64_t x, uint64_t y) -> uint64_t { return x ^ y; });
         break;
       }
       case Opcode::AArch64_EORv16i8: {  // eor vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecLogicOp_3vecs<uint8_t, 16>(
+        results[0] = vecLogicOp_3vecs<uint8_t, 16>(
             operands, [](uint8_t x, uint8_t y) -> uint8_t { return x ^ y; });
         break;
       }
       case Opcode::AArch64_EORv8i8: {  // eor vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecLogicOp_3vecs<uint8_t, 8>(
+        results[0] = vecLogicOp_3vecs<uint8_t, 8>(
             operands, [](uint8_t x, uint8_t y) -> uint8_t { return x ^ y; });
         break;
       }
@@ -1235,355 +1203,347 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_EXTRWrri: {  // extr wd, wn, wm, #lsb
-        results[0] = {
-            bitmanipHelp::extrLSB_registers<uint32_t>(operands, metadata), 8};
+        results[0] = {extrLSB_registers<uint32_t>(operands, metadata), 8};
         break;
       }
       case Opcode::AArch64_EXTRXrri: {  // extr xd, xn, xm, #lsb
-        results[0] =
-            bitmanipHelp::extrLSB_registers<uint64_t>(operands, metadata);
+        results[0] = extrLSB_registers<uint64_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_EXTv16i8: {  // ext vd.16b, vn.16b, vm.16b, #index
-        results[0] =
-            neonHelp::vecExtVecs_index<uint8_t, 16>(operands, metadata);
+        results[0] = vecExtVecs_index<uint8_t, 16>(operands, metadata);
         break;
       }
       case Opcode::AArch64_EXTv8i8: {  // ext vd.8b, vn.8b, vm.8b, #index
-        results[0] = neonHelp::vecExtVecs_index<uint8_t, 8>(operands, metadata);
+        results[0] = vecExtVecs_index<uint8_t, 8>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FABDv2f64: {  // fabd vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecFabd<double, 2>(operands);
+        results[0] = vecFabd<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FABDv4f32: {  // fabd vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecFabd<float, 4>(operands);
+        results[0] = vecFabd<float, 4>(operands);
         break;
       }
       case Opcode::AArch64_FABD32: {  // fabd sd, sn, sm
-        results[0] = floatHelp::fabd_3ops<float>(operands);
+        results[0] = fabd_3ops<float>(operands);
         break;
       }
       case Opcode::AArch64_FABD64: {  // fabd dd, dn, dm
-        results[0] = floatHelp::fabd_3ops<double>(operands);
+        results[0] = fabd_3ops<double>(operands);
         break;
       }
       case Opcode::AArch64_FABSDr: {  // fabs dd, dn
-        results[0] = floatHelp::fabs_2ops<double>(operands);
+        results[0] = fabs_2ops<double>(operands);
         break;
       }
       case Opcode::AArch64_FABSSr: {  // fabs sd, sn
-        results[0] = floatHelp::fabs_2ops<float>(operands);
+        results[0] = fabs_2ops<float>(operands);
         break;
       }
       case Opcode::AArch64_FABS_ZPmZ_D: {  // fabs zd.d, pg/m, zn.d
-        results[0] = sveHelp::sveFabsPredicated<double>(operands, VL_bits);
+        results[0] = sveFabsPredicated<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FABS_ZPmZ_S: {  // fabs zd.s, pg/m, zn.s
-        results[0] = sveHelp::sveFabsPredicated<float>(operands, VL_bits);
+        results[0] = sveFabsPredicated<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FABSv2f64: {  // fabs vd.2d, vn.2d
-        results[0] = neonHelp::vecFabs_2ops<double, 2>(operands);
+        results[0] = vecFabs_2ops<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FABSv4f32: {  // fabs vd.4s, vn.4s
-        results[0] = neonHelp::vecFabs_2ops<float, 4>(operands);
+        results[0] = vecFabs_2ops<float, 4>(operands);
         break;
       }
       case Opcode::AArch64_FADDA_VPZ_D: {  // fadda dd, pg/m, dn, zm.d
-        results[0] = sveHelp::sveFaddaPredicated<double>(operands, VL_bits);
+        results[0] = sveFaddaPredicated<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FADDA_VPZ_S: {  // fadda sd, pg/m, sn, zm.s
-        results[0] = sveHelp::sveFaddaPredicated<float>(operands, VL_bits);
+        results[0] = sveFaddaPredicated<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FADDDrr: {  // fadd dd, dn, dm
-        results[0] = {arithmeticHelp::add_3ops<double>(operands), 256};
+        results[0] = {add_3ops<double>(operands), 256};
         break;
       }
       case Opcode::AArch64_FADDPv2f32: {  // faddp vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecAddp_3ops<float, 2>(operands);
+        results[0] = vecAddp_3ops<float, 2>(operands);
         break;
       }
       case Opcode::AArch64_FADDPv2f64: {  // faddp vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecAddp_3ops<double, 2>(operands);
+        results[0] = vecAddp_3ops<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FADDPv2i32p: {  // faddp dd, vn.2s
-        results[0] = neonHelp::vecSumElems_2ops<float, 2>(operands);
+        results[0] = vecSumElems_2ops<float, 2>(operands);
         break;
       }
       case Opcode::AArch64_FADDPv2i64p: {  // faddp dd, vn.2d
-        results[0] = neonHelp::vecSumElems_2ops<double, 2>(operands);
+        results[0] = vecSumElems_2ops<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FADDPv4f32: {  // faddp vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecAddp_3ops<float, 4>(operands);
+        results[0] = vecAddp_3ops<float, 4>(operands);
         break;
       }
       case Opcode::AArch64_FADDSrr: {  // fadd sd, sn, sm
-        results[0] = {arithmeticHelp::add_3ops<float>(operands), 256};
+        results[0] = {add_3ops<float>(operands), 256};
         break;
       }
       case Opcode::AArch64_FADD_ZPmI_D: {  // fadd zdn.d, pg/m, zdn.d, const
-        results[0] = sveHelp::sveAddPredicated_const<double>(operands, metadata,
-                                                             VL_bits);
+        results[0] =
+            sveAddPredicated_const<double>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_FADD_ZPmI_S: {  // fadd zdn.s, pg/m, zdn.s, const
-        results[0] =
-            sveHelp::sveAddPredicated_const<float>(operands, metadata, VL_bits);
+        results[0] = sveAddPredicated_const<float>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_FADD_ZPmZ_D: {  // fadd zdn.d, pg/m, zdn.d, zm.d
-        results[0] = sveHelp::sveAddPredicated_vecs<double>(operands, VL_bits);
+        results[0] = sveAddPredicated_vecs<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FADD_ZPmZ_S: {  // fadd zdn.s, pg/m, zdn.s, zm.s
-        results[0] = sveHelp::sveAddPredicated_vecs<float>(operands, VL_bits);
+        results[0] = sveAddPredicated_vecs<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FADD_ZZZ_D: {  // fadd zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveAdd_3ops<double>(operands, VL_bits);
+        results[0] = sveAdd_3ops<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FADD_ZZZ_S: {  // fadd zd.s, zn.s, zm.s
-        results[0] = sveHelp::sveAdd_3ops<float>(operands, VL_bits);
+        results[0] = sveAdd_3ops<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FADDv2f32: {  // fadd vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecAdd_3ops<float, 2>(operands);
+        results[0] = vecAdd_3ops<float, 2>(operands);
         break;
       }
       case Opcode::AArch64_FADDv2f64: {  // fadd vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecAdd_3ops<double, 2>(operands);
+        results[0] = vecAdd_3ops<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FADDv4f32: {  // fadd vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecAdd_3ops<float, 4>(operands);
+        results[0] = vecAdd_3ops<float, 4>(operands);
         break;
       }
       case Opcode::AArch64_FCADD_ZPmZ_D: {  // fcadd zdn.d, pg/m, zdn.d, zm.d,
                                             // #imm
-        results[0] =
-            sveHelp::sveFcaddPredicated<double>(operands, metadata, VL_bits);
+        results[0] = sveFcaddPredicated<double>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_FCCMPDrr:     // fccmp sn, sm, #nzcv, cc
       case Opcode::AArch64_FCCMPEDrr: {  // fccmpe sn, sm, #nzcv, cc
-        results[0] = floatHelp::fccmp<double>(operands, metadata);
+        results[0] = fccmp<double>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FCCMPESrr: {  // fccmpe sn, sm, #nzcv, cc
-        results[0] = floatHelp::fccmp<float>(operands, metadata);
+        results[0] = fccmp<float>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FCCMPSrr: {  // fccmp sn, sm, #nzcv, cc
-        results[0] = floatHelp::fccmp<float>(operands, metadata);
+        results[0] = fccmp<float>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FCMEQv2i32rz: {  // fcmeq vd.2s, vd.2s, #0.0
-        results[0] = neonHelp::vecFCompare<float, uint32_t, 2>(
+        results[0] = vecFCompare<float, uint32_t, 2>(
             operands, true, [](float x, float y) -> bool { return x == y; });
         break;
       }
       case Opcode::AArch64_FCMEQv4i32rz: {  // fcmeq vd.4s vn.4s, #0.0
-        results[0] = neonHelp::vecFCompare<float, uint32_t, 4>(
+        results[0] = vecFCompare<float, uint32_t, 4>(
             operands, true, [](float x, float y) -> bool { return x == y; });
         break;
       }
       case Opcode::AArch64_FCMGE_PPzZ0_D: {  // fcmge pd.d, pg/z, zn.d, #0.0
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<double>(
+        results[0] = sveComparePredicated_vecsToPred<double>(
             operands, metadata, VL_bits, true,
             [](double x, double y) -> bool { return x >= y; });
         break;
       }
       case Opcode::AArch64_FCMGE_PPzZ0_S: {  // fcmge pd.s, pg/z, zn.s, #0.0
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<float>(
+        results[0] = sveComparePredicated_vecsToPred<float>(
             operands, metadata, VL_bits, true,
             [](float x, float y) -> bool { return x >= y; });
         break;
       }
       case Opcode::AArch64_FCMGE_PPzZZ_D: {  // fcmge pd.d, pg/z, zn.d, zm.d
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<double>(
+        results[0] = sveComparePredicated_vecsToPred<double>(
             operands, metadata, VL_bits, false,
             [](double x, double y) -> bool { return x >= y; });
         break;
       }
       case Opcode::AArch64_FCMGE_PPzZZ_S: {  // fcmge pd.s, pg/z, zn.s, zm.s
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<float>(
+        results[0] = sveComparePredicated_vecsToPred<float>(
             operands, metadata, VL_bits, false,
             [](float x, float y) -> bool { return x >= y; });
         break;
       }
       case Opcode::AArch64_FCMGEv2f32: {  // fcmge vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecFCompare<float, uint32_t, 2>(
+        results[0] = vecFCompare<float, uint32_t, 2>(
             operands, false, [](float x, float y) -> bool { return x >= y; });
         break;
       }
       case Opcode::AArch64_FCMGEv2f64: {  // fcmge vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecFCompare<double, uint64_t, 2>(
+        results[0] = vecFCompare<double, uint64_t, 2>(
             operands, false, [](float x, double y) -> bool { return x >= y; });
         break;
       }
       case Opcode::AArch64_FCMGEv2i64rz: {  // fcmge vd.2d, vn.2d, 0.0
-        results[0] = neonHelp::vecFCompare<double, uint64_t, 2>(
+        results[0] = vecFCompare<double, uint64_t, 2>(
             operands, true, [](double x, double y) -> bool { return x >= y; });
         break;
       }
       case Opcode::AArch64_FCMGEv4f32: {  // fcmge vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecFCompare<float, uint32_t, 4>(
+        results[0] = vecFCompare<float, uint32_t, 4>(
             operands, false, [](float x, float y) -> bool { return x >= y; });
         break;
       }
       case Opcode::AArch64_FCMGEv4i32rz: {  // fcmge vd.4s, vn.4s, 0.0
-        results[0] = neonHelp::vecFCompare<float, uint32_t, 4>(
+        results[0] = vecFCompare<float, uint32_t, 4>(
             operands, true, [](float x, float y) -> bool { return x >= y; });
         break;
       }
       case Opcode::AArch64_FCMGT_PPzZ0_D: {  // fcmgt pd.d, pg/z, zn.d, #0.0
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<double>(
+        results[0] = sveComparePredicated_vecsToPred<double>(
             operands, metadata, VL_bits, true,
             [](double x, double y) -> bool { return x > y; });
         break;
       }
       case Opcode::AArch64_FCMGT_PPzZ0_S: {  // fcmgt pd.s, pg/z, zn.s, #0.0
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<float>(
+        results[0] = sveComparePredicated_vecsToPred<float>(
             operands, metadata, VL_bits, true,
             [](float x, float y) -> bool { return x > y; });
         break;
       }
       case Opcode::AArch64_FCMGT_PPzZZ_D: {  // fcmgt pd.d, pg/z, zn.d, zm.d
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<double>(
+        results[0] = sveComparePredicated_vecsToPred<double>(
             operands, metadata, VL_bits, false,
             [](double x, double y) -> bool { return x > y; });
         break;
       }
       case Opcode::AArch64_FCMGT_PPzZZ_S: {  // fcmgt pd.s, pg/z, zn.s, zm.
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<float>(
+        results[0] = sveComparePredicated_vecsToPred<float>(
             operands, metadata, VL_bits, false,
             [](float x, float y) -> bool { return x > y; });
         break;
       }
       case Opcode::AArch64_FCMGTv2i32rz: {  // fcmgt vd.2s, vn.2s, #0.0
-        results[0] = neonHelp::vecFCompare<float, uint32_t, 2>(
+        results[0] = vecFCompare<float, uint32_t, 2>(
             operands, true, [](float x, float y) -> bool { return x > y; });
         break;
       }
       case Opcode::AArch64_FCMGTv2i64rz: {  // fcmgt vd.2d, vn.2d, #0.0
-        results[0] = neonHelp::vecFCompare<double, uint64_t, 2>(
+        results[0] = vecFCompare<double, uint64_t, 2>(
             operands, true, [](double x, double y) -> bool { return x > y; });
         break;
       }
       case Opcode::AArch64_FCMGTv2f64: {  // fcmgt vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecFCompare<double, uint64_t, 2>(
+        results[0] = vecFCompare<double, uint64_t, 2>(
             operands, false, [](double x, double y) -> bool { return x > y; });
         break;
       }
       case Opcode::AArch64_FCMGTv4f32: {  // fcmgt vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecFCompare<float, uint32_t, 4>(
+        results[0] = vecFCompare<float, uint32_t, 4>(
             operands, false, [](float x, float y) -> bool { return x > y; });
         break;
       }
       case Opcode::AArch64_FCMGTv4i32rz: {  // fcmgt vd.4s, vn.4s, #0.0
-        results[0] = neonHelp::vecFCompare<float, uint32_t, 4>(
+        results[0] = vecFCompare<float, uint32_t, 4>(
             operands, true, [](float x, float y) -> bool { return x > y; });
         break;
       }
       case Opcode::AArch64_FCMLA_ZPmZZ_D: {  // fcmla zda, pg/m, zn, zm, #imm
-        results[0] =
-            sveHelp::sveFcmlaPredicated<double>(operands, metadata, VL_bits);
+        results[0] = sveFcmlaPredicated<double>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_FCMLE_PPzZ0_D: {  // fcmle pd.d, pg/z, zn.d, #0.0
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<double>(
+        results[0] = sveComparePredicated_vecsToPred<double>(
             operands, metadata, VL_bits, true,
             [](double x, double y) -> bool { return x <= y; });
         break;
       }
       case Opcode::AArch64_FCMLE_PPzZ0_S: {  // fcmle pd.s, pg/z, zn.s, #0.0
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<float>(
+        results[0] = sveComparePredicated_vecsToPred<float>(
             operands, metadata, VL_bits, true,
             [](float x, float y) -> bool { return x <= y; });
         break;
       }
       case Opcode::AArch64_FCMLT_PPzZ0_S: {  // fcmlt pd.s, pg/z, zn.s, #0.0
-        results[0] = sveHelp::sveComparePredicated_vecsToPred<float>(
+        results[0] = sveComparePredicated_vecsToPred<float>(
             operands, metadata, VL_bits, true,
             [](float x, float y) -> bool { return x < y; });
         break;
       }
       case Opcode::AArch64_FCMLTv2i32rz: {  // fcmlt vd.2s, vn.2s, #0.0
-        results[0] = neonHelp::vecFCompare<float, uint32_t, 2>(
+        results[0] = vecFCompare<float, uint32_t, 2>(
             operands, true, [](float x, float y) -> bool { return x < y; });
         break;
       }
       case Opcode::AArch64_FCMLTv2i64rz: {  // fcmlt vd.2d, vn.2d, #0.0
-        results[0] = neonHelp::vecFCompare<double, uint64_t, 2>(
+        results[0] = vecFCompare<double, uint64_t, 2>(
             operands, true, [](double x, double y) -> bool { return x < y; });
         break;
       }
       case Opcode::AArch64_FCMLTv4i32rz: {  // fcmlt vd.4s, vn.4s, #0.0
-        results[0] = neonHelp::vecFCompare<float, uint32_t, 4>(
+        results[0] = vecFCompare<float, uint32_t, 4>(
             operands, true, [](float x, float y) -> bool { return x < y; });
         break;
       }
       case Opcode::AArch64_FCMPDri: {  // fcmp dn, #imm
-        results[0] = floatHelp::fcmp<double>(operands, true);
+        results[0] = fcmp<double>(operands, true);
         break;
       }
       case Opcode::AArch64_FCMPDrr: {  // fcmp dn, dm
-        results[0] = floatHelp::fcmp<double>(operands, false);
+        results[0] = fcmp<double>(operands, false);
         break;
       }
       case Opcode::AArch64_FCMPEDri: {  // fcmpe dn, #imm
-        results[0] = floatHelp::fcmp<double>(operands, true);
+        results[0] = fcmp<double>(operands, true);
         break;
       }
       case Opcode::AArch64_FCMPEDrr: {  // fcmpe dn, dm
-        results[0] = floatHelp::fcmp<double>(operands, false);
+        results[0] = fcmp<double>(operands, false);
         break;
       }
       case Opcode::AArch64_FCMPESri: {  // fcmpe sn, #imm
-        results[0] = floatHelp::fcmp<float>(operands, true);
+        results[0] = fcmp<float>(operands, true);
         break;
       }
       case Opcode::AArch64_FCMPESrr: {  // fcmpe sn, sm
-        results[0] = floatHelp::fcmp<float>(operands, false);
+        results[0] = fcmp<float>(operands, false);
         break;
       }
       case Opcode::AArch64_FCMPSri: {  // fcmp sn, #imm
-        results[0] = floatHelp::fcmp<float>(operands, true);
+        results[0] = fcmp<float>(operands, true);
         break;
       }
       case Opcode::AArch64_FCMPSrr: {  // fcmp sn, sm
-        results[0] = floatHelp::fcmp<float>(operands, false);
+        results[0] = fcmp<float>(operands, false);
         break;
       }
       case Opcode::AArch64_FCPY_ZPmI_D: {  // fcpy zd.d, pg/m, #const
-        results[0] = sveHelp::sveFcpy_imm<double>(operands, metadata, VL_bits);
+        results[0] = sveFcpy_imm<double>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_FCPY_ZPmI_S: {  // fcpy zd.s, pg/m, #const
-        results[0] = sveHelp::sveFcpy_imm<float>(operands, metadata, VL_bits);
+        results[0] = sveFcpy_imm<float>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_FCSELDrrr: {  // fcsel dd, dn, dm, cond
-        results[0] = {
-            conditionalHelp::cs_4ops<double>(
-                operands, metadata, [](double x) -> double { return x; }),
-            256};
+        results[0] = {cs_4ops<double>(operands, metadata,
+                                      [](double x) -> double { return x; }),
+                      256};
         break;
       }
       case Opcode::AArch64_FCSELSrrr: {  // fcsel sd, sn, sm, cond
-        results[0] = {
-            conditionalHelp::cs_4ops<float>(operands, metadata,
-                                            [](float x) -> float { return x; }),
-            256};
+        results[0] = {cs_4ops<float>(operands, metadata,
+                                     [](float x) -> float { return x; }),
+                      256};
         break;
       }
       case Opcode::AArch64_FCVTASUWDr: {  // fcvtas wd, dn
@@ -1597,28 +1557,28 @@ void Instruction::execute() {
       }
       case Opcode::AArch64_FCVTDSr: {  // fcvt dd, sn
         // TODO: Handle NaNs, denorms, and saturation?
-        results[0] = neonHelp::vecFcvtl<double, float, 1>(operands, false);
+        results[0] = vecFcvtl<double, float, 1>(operands, false);
         break;
       }
       case Opcode::AArch64_FCVTLv2i32: {  // fcvtl vd.2d, vn.2s
-        results[0] = neonHelp::vecFcvtl<double, float, 2>(operands, false);
+        results[0] = vecFcvtl<double, float, 2>(operands, false);
         break;
       }
       case Opcode::AArch64_FCVTLv4i32: {  // fcvtl2 vd.2d, vn.4s
-        results[0] = neonHelp::vecFcvtl<double, float, 2>(operands, true);
+        results[0] = vecFcvtl<double, float, 2>(operands, true);
         break;
       }
       case Opcode::AArch64_FCVTNv2i32: {  // fcvtn vd.2s, vn.2d
-        results[0] = neonHelp::vecFcvtn<float, double, 2>(operands, false);
+        results[0] = vecFcvtn<float, double, 2>(operands, false);
         break;
       }
       case Opcode::AArch64_FCVTNv4i32: {  // fcvtn2 vd.4s, vn.2d
-        results[0] = neonHelp::vecFcvtn<float, double, 4>(operands, true);
+        results[0] = vecFcvtn<float, double, 4>(operands, true);
         break;
       }
       case Opcode::AArch64_FCVTSDr: {  // fcvt sd, dn
         // TODO: Handle NaNs, denorms, and saturation?
-        results[0] = neonHelp::vecFcvtl<float, double, 1>(operands, false);
+        results[0] = vecFcvtl<float, double, 1>(operands, false);
         break;
       }
       case Opcode::AArch64_FCVTZSUWDr: {  // fcvtzs wd, dn
@@ -1640,234 +1600,212 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_FCVTZS_ZPmZ_DtoD: {  // fcvtzs zd.d, pg/m, zn.d
-        results[0] =
-            sveHelp::sveFcvtzsPredicated<int64_t, double>(operands, VL_bits);
+        results[0] = sveFcvtzsPredicated<int64_t, double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FCVTZS_ZPmZ_DtoS: {  // fcvtzs zd.s, pg/m, zn.d
-        results[0] =
-            sveHelp::sveFcvtzsPredicated<int32_t, double>(operands, VL_bits);
+        results[0] = sveFcvtzsPredicated<int32_t, double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FCVTZS_ZPmZ_StoD: {  // fcvtzs zd.d, pg/m, zn.s
-        results[0] =
-            sveHelp::sveFcvtzsPredicated<int64_t, float>(operands, VL_bits);
+        results[0] = sveFcvtzsPredicated<int64_t, float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FCVTZS_ZPmZ_StoS: {  // fcvtzs zd.s, pg/m, zn.s
-        results[0] =
-            sveHelp::sveFcvtzsPredicated<int32_t, float>(operands, VL_bits);
+        results[0] = sveFcvtzsPredicated<int32_t, float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FCVTZSv2f64: {  // fcvtzs vd.2d, vn.2d
-        results[0] = neonHelp::vecFcvtzs<int64_t, double, 2>(operands);
+        results[0] = vecFcvtzs<int64_t, double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FCVTZUUWDr: {  // fcvtzu wd, dn
-        // TODO: Handle NaNs, denorms, and saturation
-        results[0] = {
-            static_cast<int32_t>(std::trunc(operands[0].get<double>())), 8};
+        results[0] = {fcvtzu_integer<uint32_t, double>(operands), 8};
         break;
       }
       case Opcode::AArch64_FCVTZUUWSr: {  // fcvtzu wd, sn
-        // TODO: Handle NaNs, denorms, and saturation
-        results[0] = {
-            static_cast<int32_t>(std::trunc(operands[0].get<float>())), 8};
+        results[0] = {fcvtzu_integer<uint32_t, float>(operands), 8};
         break;
       }
       case Opcode::AArch64_FCVTZUUXDr: {  // fcvtzu xd, dn
-        // TODO: Handle NaNs, denorms, and saturation
-        results[0] =
-            static_cast<int64_t>(std::trunc(operands[0].get<double>()));
+        results[0] = {fcvtzu_integer<uint64_t, double>(operands), 8};
         break;
       }
       case Opcode::AArch64_FCVTZUUXSr: {  // fcvtzu xd, sn
-        // TODO: Handle NaNs, denorms, and saturation
-        results[0] = static_cast<int64_t>(std::trunc(operands[0].get<float>()));
+        results[0] = {fcvtzu_integer<uint64_t, float>(operands), 8};
         break;
       }
       case Opcode::AArch64_FCVTZUv1i64: {  // fcvtzu dd, dn
-        // TODO: Handle NaNs, denorms, and saturation
-        results[0] = {
-            static_cast<uint64_t>(std::trunc(operands[0].get<double>())), 256};
+        results[0] = {fcvtzu_integer<uint64_t, double>(operands), 256};
         break;
       }
       case Opcode::AArch64_FCVT_ZPmZ_DtoS: {  // fcvt zd.s, pg/m, zn.d
-        results[0] =
-            sveHelp::sveFcvtPredicated<float, double>(operands, VL_bits);
+        results[0] = sveFcvtPredicated<float, double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FCVT_ZPmZ_StoD: {  // fcvt zd.d, pg/m, zn.s
-        results[0] =
-            sveHelp::sveFcvtPredicated<double, float>(operands, VL_bits);
+        results[0] = sveFcvtPredicated<double, float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FDIVDrr: {  // fdiv dd, dn, dm
-        results[0] = {divideHelp::div_3ops<double>(operands), 256};
+        results[0] = {div_3ops<double>(operands), 256};
         break;
       }
       case Opcode::AArch64_FDIVR_ZPmZ_D: {  // fdivr zdn.d, pg/m, zdn.d, zm.d
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<double>(
+        results[0] = sveLogicOpPredicated_3vecs<double>(
             operands, VL_bits,
             [](double x, double y) -> double { return (y / x); });
         break;
       }
       case Opcode::AArch64_FDIVR_ZPmZ_S: {  // fdivr zdn.s, pg/m, zdn.s, zm.s
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<float>(
+        results[0] = sveLogicOpPredicated_3vecs<float>(
             operands, VL_bits,
             [](float x, float y) -> float { return (y / x); });
         break;
       }
       case Opcode::AArch64_FDIVSrr: {  // fdiv sd, sn, sm
-        results[0] = {divideHelp::div_3ops<float>(operands), 256};
+        results[0] = {div_3ops<float>(operands), 256};
         break;
       }
       case Opcode::AArch64_FDIV_ZPmZ_D: {  // fdiv zdn.d, pg/m, zdn.d, zm.d
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<double>(
+        results[0] = sveLogicOpPredicated_3vecs<double>(
             operands, VL_bits,
             [](double x, double y) -> double { return (x / y); });
         break;
       }
       case Opcode::AArch64_FDIVv2f64: {  // fdiv vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecLogicOp_3vecs<double, 2>(
+        results[0] = vecLogicOp_3vecs<double, 2>(
             operands, [](double x, double y) -> double { return x / y; });
         break;
       }
       case Opcode::AArch64_FDUP_ZI_D: {  // fdup zd.d, #imm
-        results[0] = sveHelp::sveDup_immOrScalar<double>(operands, metadata,
-                                                         VL_bits, true);
+        results[0] =
+            sveDup_immOrScalar<double>(operands, metadata, VL_bits, true);
         break;
       }
       case Opcode::AArch64_FDUP_ZI_S: {  // fdup zd.s, #imm
-        results[0] = sveHelp::sveDup_immOrScalar<float>(operands, metadata,
-                                                        VL_bits, true);
+        results[0] =
+            sveDup_immOrScalar<float>(operands, metadata, VL_bits, true);
         break;
       }
       case Opcode::AArch64_FMADDDrrr: {  // fmadd dn, dm, da
-        results[0] = {multiplyHelp::madd_4ops<double>(operands), 256};
+        results[0] = {madd_4ops<double>(operands), 256};
         break;
       }
       case Opcode::AArch64_FMADDSrrr: {  // fmadd sn, sm, sa
-        results[0] = {multiplyHelp::madd_4ops<float>(operands), 256};
+        results[0] = {madd_4ops<float>(operands), 256};
         break;
       }
       case Opcode::AArch64_FMAD_ZPmZZ_D: {  // fmad zd.d, pg/m, zn.d, zm.d
-        results[0] = sveHelp::sveFmadPredicated_vecs<double>(operands, VL_bits);
+        results[0] = sveFmadPredicated_vecs<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FMAD_ZPmZZ_S: {  // fmad zd.s, pg/m, zn.s, zm.s
-        results[0] = sveHelp::sveFmadPredicated_vecs<float>(operands, VL_bits);
+        results[0] = sveFmadPredicated_vecs<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FMAXNMDrr: {  // fmaxnm dd, dn, dm
-        results[0] = floatHelp::fmaxnm_3ops<double>(operands);
+        results[0] = fmaxnm_3ops<double>(operands);
         break;
       }
       case Opcode::AArch64_FMAXNMPv2i64p: {  // fmaxnmp dd, vd.2d
-        results[0] = neonHelp::vecMaxnmp_2ops<double, 2>(operands);
+        results[0] = vecMaxnmp_2ops<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FMAXNMSrr: {  // fmaxnm sd, sn, sm
-        results[0] = floatHelp::fmaxnm_3ops<float>(operands);
+        results[0] = fmaxnm_3ops<float>(operands);
         break;
       }
       case Opcode::AArch64_FMAXNMv2f64: {  // fmaxnm vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecLogicOp_3vecs<double, 2>(
+        results[0] = vecLogicOp_3vecs<double, 2>(
             operands,
             [](double x, double y) -> double { return std::fmax(x, y); });
         break;
       }
       case Opcode::AArch64_FMINNMDrr: {  // fminnm dd, dn, dm
-        results[0] = floatHelp::fminnm_3ops<double>(operands);
+        results[0] = fminnm_3ops<double>(operands);
         break;
       }
       case Opcode::AArch64_FMINNMPv2i64p: {  // fminnmp dd, vd.2d
-        results[0] = neonHelp::vecMinv_2ops<double, 2>(operands);
+        results[0] = vecMinv_2ops<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FMINNMSrr: {  // fminnm sd, sn, sm
-        results[0] = floatHelp::fminnm_3ops<float>(operands);
+        results[0] = fminnm_3ops<float>(operands);
         break;
       }
       case Opcode::AArch64_FMINNMv2f64: {  // fminnm vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecLogicOp_3vecs<double, 2>(
+        results[0] = vecLogicOp_3vecs<double, 2>(
             operands,
             [](double x, double y) -> double { return std::fmin(x, y); });
         break;
       }
       case Opcode::AArch64_FMLA_ZPmZZ_D: {  // fmla zd.d, pg/m, zn.d, zm.d
-        results[0] = sveHelp::sveMlaPredicated_vecs<double>(operands, VL_bits);
+        results[0] = sveMlaPredicated_vecs<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FMLA_ZPmZZ_S: {  // fmla zd.s, pg/m, zn.s, zm.s
-        results[0] = sveHelp::sveMlaPredicated_vecs<float>(operands, VL_bits);
+        results[0] = sveMlaPredicated_vecs<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FMLAv2f32: {  // fmla vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecFmla_3vecs<float, 2>(operands);
+        results[0] = vecFmla_3vecs<float, 2>(operands);
         break;
       }
       case Opcode::AArch64_FMLA_ZZZI_D: {  // fmla zda.d, zn.d, zm.d[index]
-        results[0] =
-            sveHelp::sveMlaIndexed_vecs<double>(operands, metadata, VL_bits);
+        results[0] = sveMlaIndexed_vecs<double>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_FMLA_ZZZI_S: {  // fmla zda.s, zn.s, zm.s[index]
-        results[0] =
-            sveHelp::sveMlaIndexed_vecs<float>(operands, metadata, VL_bits);
+        results[0] = sveMlaIndexed_vecs<float>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_FMLAv2f64: {  // fmla vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecFmla_3vecs<double, 2>(operands);
+        results[0] = vecFmla_3vecs<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FMLAv2i32_indexed: {  // fmla vd.2s, vn.2s,
                                                  // vm.2s[index]
-        results[0] =
-            neonHelp::vecFmlaIndexed_3vecs<float, 2>(operands, metadata);
+        results[0] = vecFmlaIndexed_3vecs<float, 2>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FMLAv2i64_indexed: {  // fmla vd.2d, vn.2d,
                                                  // vm.d[index]
-        results[0] =
-            neonHelp::vecFmlaIndexed_3vecs<double, 2>(operands, metadata);
+        results[0] = vecFmlaIndexed_3vecs<double, 2>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FMLAv4f32: {  // fmla vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecFmla_3vecs<float, 4>(operands);
+        results[0] = vecFmla_3vecs<float, 4>(operands);
         break;
       }
       case Opcode::AArch64_FMLAv4i32_indexed: {  // fmla vd.4s, vn.4s,
                                                  // vm.s[index]
-        results[0] =
-            neonHelp::vecFmlaIndexed_3vecs<float, 4>(operands, metadata);
+        results[0] = vecFmlaIndexed_3vecs<float, 4>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FMLS_ZPmZZ_D: {  // fmls zd.d, pg/m, zn.d, zm.d
-        results[0] = sveHelp::sveFmlsPredicated_vecs<double>(operands, VL_bits);
+        results[0] = sveFmlsPredicated_vecs<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FMLS_ZPmZZ_S: {  // fmls zd.s, pg/m, zn.s, zm.s
-        results[0] = sveHelp::sveFmlsPredicated_vecs<float>(operands, VL_bits);
+        results[0] = sveFmlsPredicated_vecs<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FMLSv2f64: {  // fmls vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecFmls_3vecs<double, 2>(operands);
+        results[0] = vecFmls_3vecs<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FMLSv2i64_indexed: {
-        results[0] =
-            neonHelp::vecFmlsIndexed_3vecs<double, 2>(operands, metadata);
+        results[0] = vecFmlsIndexed_3vecs<double, 2>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FMLSv4f32: {  // fmls vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecFmls_3vecs<float, 4>(operands);
+        results[0] = vecFmls_3vecs<float, 4>(operands);
         break;
       }
       case Opcode::AArch64_FMLSv4i32_indexed: {  // fmls vd.4s, vn.4s,
                                                  // vm.s[index]
-        results[0] =
-            neonHelp::vecFmlsIndexed_3vecs<float, 4>(operands, metadata);
+        results[0] = vecFmlsIndexed_3vecs<float, 4>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FMOPA_MPPZZ_D: {  // fmopa zada.d, pn/m, pm/m, zn.d,
@@ -1974,110 +1912,104 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_FMOVv2f32_ns: {  // fmov vd.2s, #imm
-        results[0] = neonHelp::vecMovi_imm<float, 2>(metadata);
+        results[0] = vecMovi_imm<float, 2>(metadata);
         break;
       }
       case Opcode::AArch64_FMOVv2f64_ns: {  // fmov vd.2d, #imm
-        results[0] = neonHelp::vecMovi_imm<double, 2>(metadata);
+        results[0] = vecMovi_imm<double, 2>(metadata);
         break;
       }
       case Opcode::AArch64_FMOVv4f32_ns: {  // fmov vd.4s, #imm
-        results[0] = neonHelp::vecMovi_imm<float, 4>(metadata);
+        results[0] = vecMovi_imm<float, 4>(metadata);
         break;
       }
       case Opcode::AArch64_FMSB_ZPmZZ_D: {  // fmsb zd.d, pg/m, zn.d, zm.d
-        results[0] = sveHelp::sveFmsbPredicated_vecs<double>(operands, VL_bits);
+        results[0] = sveFmsbPredicated_vecs<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FMSB_ZPmZZ_S: {  // fmsb zd.s, pg/m, zn.s, zm.s
-        results[0] = sveHelp::sveFmsbPredicated_vecs<float>(operands, VL_bits);
+        results[0] = sveFmsbPredicated_vecs<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FMSUBDrrr: {  // fmsub dn, dm, da
-        results[0] = {multiplyHelp::msub_4ops<double>(operands), 256};
+        results[0] = {msub_4ops<double>(operands), 256};
         break;
       }
       case Opcode::AArch64_FMSUBSrrr: {  // fmsub sn, sm, sa
-        results[0] = {multiplyHelp::msub_4ops<float>(operands), 256};
+        results[0] = {msub_4ops<float>(operands), 256};
         break;
       }
       case Opcode::AArch64_FMULDrr: {  // fmul dd, dn, dm
-        results[0] = {multiplyHelp::mul_3ops<double>(operands), 256};
+        results[0] = {mul_3ops<double>(operands), 256};
         break;
       }
       case Opcode::AArch64_FMULSrr: {  // fmul sd, sn, sm
-        results[0] = {multiplyHelp::mul_3ops<float>(operands), 256};
+        results[0] = {mul_3ops<float>(operands), 256};
         break;
       }
       case Opcode::AArch64_FMUL_ZPmI_D: {  // fmul zd.d, pg/m, zn.d, #imm
-        results[0] = sveHelp::sveMulPredicated<double>(operands, metadata,
-                                                       VL_bits, true);
+        results[0] =
+            sveMulPredicated<double>(operands, metadata, VL_bits, true);
         break;
       }
       case Opcode::AArch64_FMUL_ZPmI_S: {  // fmul zd.s, pg/m, zn.s, #imm
-        results[0] =
-            sveHelp::sveMulPredicated<float>(operands, metadata, VL_bits, true);
+        results[0] = sveMulPredicated<float>(operands, metadata, VL_bits, true);
         break;
       }
       case Opcode::AArch64_FMUL_ZPmZ_D: {  // fmul zdn.d, pg/m, zdn.d, zm.d
-        results[0] = sveHelp::sveMulPredicated<double>(operands, metadata,
-                                                       VL_bits, false);
+        results[0] =
+            sveMulPredicated<double>(operands, metadata, VL_bits, false);
         break;
       }
       case Opcode::AArch64_FMUL_ZPmZ_S: {  // fmul zdn.s, pg/m, zdn.s, zm.s
-        results[0] = sveHelp::sveMulPredicated<float>(operands, metadata,
-                                                      VL_bits, false);
+        results[0] =
+            sveMulPredicated<float>(operands, metadata, VL_bits, false);
         break;
       }
       case Opcode::AArch64_FMUL_ZZZ_D: {  // fmul zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveFmul_3ops<double>(operands, VL_bits);
+        results[0] = sveFmul_3ops<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FMUL_ZZZ_S: {  // fmul zd.s, zn.s, zm.s
-        results[0] = sveHelp::sveFmul_3ops<float>(operands, VL_bits);
+        results[0] = sveFmul_3ops<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FMULv1i32_indexed: {  // fmul sd, sn, vm.s[index]
-        results[0] =
-            neonHelp::vecFmulIndexed_vecs<float, 1>(operands, metadata);
+        results[0] = vecFmulIndexed_vecs<float, 1>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FMULv1i64_indexed: {  // fmul dd, dn, vm.d[index]
-        results[0] =
-            neonHelp::vecFmulIndexed_vecs<double, 1>(operands, metadata);
+        results[0] = vecFmulIndexed_vecs<double, 1>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FMULv2f32: {  // fmul vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecLogicOp_3vecs<float, 2>(
+        results[0] = vecLogicOp_3vecs<float, 2>(
             operands, [](float x, float y) -> float { return x * y; });
         break;
       }
       case Opcode::AArch64_FMULv2f64: {  // fmul vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecLogicOp_3vecs<double, 2>(
+        results[0] = vecLogicOp_3vecs<double, 2>(
             operands, [](double x, double y) -> double { return x * y; });
         break;
       }
       case Opcode::AArch64_FMULv2i32_indexed: {  // fmul vd.2s, vn.2s,
                                                  // vm.s[index]
-        results[0] =
-            neonHelp::vecFmulIndexed_vecs<float, 2>(operands, metadata);
+        results[0] = vecFmulIndexed_vecs<float, 2>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FMULv2i64_indexed: {  // fmul vd.2d, vn.2d,
                                                  // vm.d[index]
-        results[0] =
-            neonHelp::vecFmulIndexed_vecs<double, 2>(operands, metadata);
+        results[0] = vecFmulIndexed_vecs<double, 2>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FMULv4f32: {  // fmul vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecLogicOp_3vecs<float, 4>(
+        results[0] = vecLogicOp_3vecs<float, 4>(
             operands, [](float x, float y) -> float { return x * y; });
         break;
       }
       case Opcode::AArch64_FMULv4i32_indexed: {  // fmul vd.4s, vn.4s,
                                                  // vm.s[index]
-        results[0] =
-            neonHelp::vecFmulIndexed_vecs<float, 4>(operands, metadata);
+        results[0] = vecFmulIndexed_vecs<float, 4>(operands, metadata);
         break;
       }
       case Opcode::AArch64_FNEGDr: {  // fneg dd, dn
@@ -2089,60 +2021,60 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_FNEG_ZPmZ_D: {  // fneg zd.d, pg/m, zn.d
-        results[0] = sveHelp::sveFnegPredicated<double>(operands, VL_bits);
+        results[0] = sveFnegPredicated<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FNEG_ZPmZ_S: {  // fneg zd.s, pg/m, zn.s
-        results[0] = sveHelp::sveFnegPredicated<float>(operands, VL_bits);
+        results[0] = sveFnegPredicated<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FNEGv2f64: {  // fneg vd.2d, vn.2d
-        results[0] = neonHelp::vecFneg_2ops<double, 2>(operands);
+        results[0] = vecFneg_2ops<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FNEGv4f32: {  // fneg vd.4s, vn.4s
-        results[0] = neonHelp::vecFneg_2ops<float, 4>(operands);
+        results[0] = vecFneg_2ops<float, 4>(operands);
         break;
       }
       case Opcode::AArch64_FNMADDDrrr: {  // fnmadd dd, dn, dm, da
-        results[0] = floatHelp::fnmadd_4ops<double>(operands);
+        results[0] = fnmadd_4ops<double>(operands);
         break;
       }
       case Opcode::AArch64_FNMADDSrrr: {  // fnmadd sd, sn, sm, sa
-        results[0] = floatHelp::fnmadd_4ops<float>(operands);
+        results[0] = fnmadd_4ops<float>(operands);
         break;
       }
       case Opcode::AArch64_FNMLS_ZPmZZ_D: {  // fnmls zd.d, pg/m, zn.d, zm.d
-        results[0] = sveHelp::sveFnmlsPredicated<double>(operands, VL_bits);
+        results[0] = sveFnmlsPredicated<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FNMLS_ZPmZZ_S: {  // fnmls zd.s, pg/m, zn.s, zm.s
-        results[0] = sveHelp::sveFnmlsPredicated<float>(operands, VL_bits);
+        results[0] = sveFnmlsPredicated<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FNMSB_ZPmZZ_D: {  // fnmsb zdn.d, pg/m, zm.d, za.d
-        results[0] = sveHelp::sveFnmsbPredicated<double>(operands, VL_bits);
+        results[0] = sveFnmsbPredicated<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FNMSB_ZPmZZ_S: {  // fnmsb zdn.s, pg/m, zm.s, za.s
-        results[0] = sveHelp::sveFnmsbPredicated<float>(operands, VL_bits);
+        results[0] = sveFnmsbPredicated<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FNMSUBDrrr: {  // fnmsub dd, dn, dm, da
-        results[0] = floatHelp::fnmsub_4ops<double>(operands);
+        results[0] = fnmsub_4ops<double>(operands);
         break;
       }
       case Opcode::AArch64_FNMSUBSrrr: {  // fnmsub sd, sn, sm, sa
-        results[0] = floatHelp::fnmsub_4ops<float>(operands);
+        results[0] = fnmsub_4ops<float>(operands);
         break;
       }
       case Opcode::AArch64_FNMULDrr: {  // fnmul dd, dn, dm
-        results[0] = neonHelp::vecLogicOp_3vecs<double, 1>(
+        results[0] = vecLogicOp_3vecs<double, 1>(
             operands, [](double x, double y) -> double { return -(x * y); });
         break;
       }
       case Opcode::AArch64_FNMULSrr: {  // fnmul sd, sn, sm
-        results[0] = neonHelp::vecLogicOp_3vecs<float, 1>(
+        results[0] = vecLogicOp_3vecs<float, 1>(
             operands, [](float x, float y) -> float { return -(x * y); });
         break;
       }
@@ -2151,61 +2083,59 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_FRINTN_ZPmZ_D: {  // frintn zd.d, pg/m, zn.d
-        results[0] =
-            sveHelp::sveFrintnPredicated<int64_t, double>(operands, VL_bits);
+        results[0] = sveFrintnPredicated<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FRINTN_ZPmZ_S: {  // frintn zd.s, pg/m, zn.s
-        results[0] =
-            sveHelp::sveFrintnPredicated<int32_t, float>(operands, VL_bits);
+        results[0] = sveFrintnPredicated<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FRINTPDr: {  // frintp dd, dn
-        results[0] = floatHelp::frintpScalar_2ops<double>(operands);
+        results[0] = frintpScalar_2ops<double>(operands);
         break;
       }
       case Opcode::AArch64_FRINTPSr: {  // frintp sd, sn
-        results[0] = floatHelp::frintpScalar_2ops<float>(operands);
+        results[0] = frintpScalar_2ops<float>(operands);
         break;
       }
       case Opcode::AArch64_FRSQRTEv1i32: {  // frsqrte sd, sn
-        results[0] = neonHelp::vecFrsqrte_2ops<float, 1>(operands);
+        results[0] = vecFrsqrte_2ops<float, 1>(operands);
         break;
       }
       case Opcode::AArch64_FRSQRTEv1i64: {  // frsqrte dd, dn
-        results[0] = neonHelp::vecFrsqrte_2ops<double, 1>(operands);
+        results[0] = vecFrsqrte_2ops<double, 1>(operands);
         break;
       }
       case Opcode::AArch64_FRSQRTEv2f32: {  // frsqrte vd.2s, vn.2s
-        results[0] = neonHelp::vecFrsqrte_2ops<float, 2>(operands);
+        results[0] = vecFrsqrte_2ops<float, 2>(operands);
         break;
       }
       case Opcode::AArch64_FRSQRTEv2f64: {  // frsqrte vd.2d, vn.2d
-        results[0] = neonHelp::vecFrsqrte_2ops<double, 2>(operands);
+        results[0] = vecFrsqrte_2ops<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FRSQRTEv4f32: {  // frsqrte vd.4s, vn.4s
-        results[0] = neonHelp::vecFrsqrte_2ops<float, 4>(operands);
+        results[0] = vecFrsqrte_2ops<float, 4>(operands);
         break;
       }
       case Opcode::AArch64_FRSQRTS32: {  // frsqrts sd, sn, sm
-        results[0] = neonHelp::vecFrsqrts_3ops<float, 1>(operands);
+        results[0] = vecFrsqrts_3ops<float, 1>(operands);
         break;
       }
       case Opcode::AArch64_FRSQRTS64: {  // frsqrts dd, dn, dm
-        results[0] = neonHelp::vecFrsqrts_3ops<double, 1>(operands);
+        results[0] = vecFrsqrts_3ops<double, 1>(operands);
         break;
       }
       case Opcode::AArch64_FRSQRTSv2f32: {  // frsqrts vd.2s, vn.2s, vn.2s
-        results[0] = neonHelp::vecFrsqrts_3ops<float, 2>(operands);
+        results[0] = vecFrsqrts_3ops<float, 2>(operands);
         break;
       }
       case Opcode::AArch64_FRSQRTSv2f64: {  // frsqrts vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecFrsqrts_3ops<double, 2>(operands);
+        results[0] = vecFrsqrts_3ops<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FRSQRTSv4f32: {  // frsqrts vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecFrsqrts_3ops<float, 4>(operands);
+        results[0] = vecFrsqrts_3ops<float, 4>(operands);
         break;
       }
       case Opcode::AArch64_FSQRTDr: {  // fsqrt dd, dn
@@ -2217,83 +2147,78 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_FSQRT_ZPmZ_D: {  // fsqrt zd.d, pg/m, zn.d
-        results[0] =
-            sveHelp::sveFsqrtPredicated_2vecs<double>(operands, VL_bits);
+        results[0] = sveFsqrtPredicated_2vecs<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FSQRT_ZPmZ_S: {  // fsqrt zd.s, pg/m, zn.s
-        results[0] =
-            sveHelp::sveFsqrtPredicated_2vecs<float>(operands, VL_bits);
+        results[0] = sveFsqrtPredicated_2vecs<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FSQRTv2f64: {  // fsqrt vd.2d, vn.2d
-        results[0] = neonHelp::vecFsqrt_2ops<double, 2>(operands);
+        results[0] = vecFsqrt_2ops<double, 2>(operands);
         break;
       }
       case Opcode::AArch64_FSQRTv4f32: {  // fsqrt vd.4s, vn.4s
-        results[0] = neonHelp::vecFsqrt_2ops<float, 4>(operands);
+        results[0] = vecFsqrt_2ops<float, 4>(operands);
         break;
       }
       case Opcode::AArch64_FSUBDrr: {  // fsub dd, dn, dm
-        results[0] = neonHelp::vecLogicOp_3vecs<double, 1>(
+        results[0] = vecLogicOp_3vecs<double, 1>(
             operands, [](double x, double y) -> double { return x - y; });
         break;
       }
       case Opcode::AArch64_FSUBR_ZPmZ_D: {  // fsubr zdn.d, pg/m, zdn.d, zm.d
-        results[0] =
-            sveHelp::sveSubrPredicated_3vecs<double>(operands, VL_bits);
+        results[0] = sveSubrPredicated_3vecs<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FSUBR_ZPmZ_S: {  // fsubr zdn.s, pg/m, zdn.s, zm.s
-        results[0] = sveHelp::sveSubrPredicated_3vecs<float>(operands, VL_bits);
+        results[0] = sveSubrPredicated_3vecs<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FSUBSrr: {  // fsub ss, sn, sm
-        results[0] = neonHelp::vecLogicOp_3vecs<float, 1>(
+        results[0] = vecLogicOp_3vecs<float, 1>(
             operands, [](double x, double y) -> double { return x - y; });
         break;
       }
       case Opcode::AArch64_FSUB_ZPmI_D: {  // fsub zdn.d, pg/m, zdn.d, #imm
-        results[0] =
-            sveHelp::sveSubPredicated_imm<double>(operands, metadata, VL_bits);
+        results[0] = sveSubPredicated_imm<double>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_FSUB_ZPmI_S: {  // fsub zdn.s, pg/m, zdn.s, #imm
-        results[0] =
-            sveHelp::sveSubPredicated_imm<float>(operands, metadata, VL_bits);
+        results[0] = sveSubPredicated_imm<float>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_FSUB_ZPmZ_D: {  // fsub zdn.d, pg/m, zdn.d, zm.d
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<double>(
+        results[0] = sveLogicOpPredicated_3vecs<double>(
             operands, VL_bits,
             [](double x, double y) -> double { return x - y; });
         break;
       }
       case Opcode::AArch64_FSUB_ZPmZ_S: {  // fsub zdn.s, pg/m, zdn.s, zm.s
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<float>(
+        results[0] = sveLogicOpPredicated_3vecs<float>(
             operands, VL_bits, [](float x, float y) -> float { return x - y; });
         break;
       }
       case Opcode::AArch64_FSUB_ZZZ_D: {  // fsub zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveSub_3vecs<double>(operands, VL_bits);
+        results[0] = sveSub_3vecs<double>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FSUB_ZZZ_S: {  // fsub zd.s, zn.s, zm.s
-        results[0] = sveHelp::sveSub_3vecs<float>(operands, VL_bits);
+        results[0] = sveSub_3vecs<float>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_FSUBv2f32: {
-        results[0] = neonHelp::vecLogicOp_3vecs<float, 2>(
+        results[0] = vecLogicOp_3vecs<float, 2>(
             operands, [](float x, float y) -> float { return x - y; });
         break;
       }
       case Opcode::AArch64_FSUBv2f64: {  // fsub vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecLogicOp_3vecs<double, 2>(
+        results[0] = vecLogicOp_3vecs<double, 2>(
             operands, [](double x, double y) -> double { return x - y; });
         break;
       }
       case Opcode::AArch64_FSUBv4f32: {  // fsub vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecLogicOp_3vecs<float, 4>(
+        results[0] = vecLogicOp_3vecs<float, 4>(
             operands, [](float x, float y) -> float { return x - y; });
         break;
       }
@@ -2397,159 +2322,147 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_INCB_XPiI: {  // incb xdn{, pattern{, #imm}}
-        results[0] =
-            sveHelp::sveInc_gprImm<int8_t>(operands, metadata, VL_bits);
+        results[0] = sveInc_gprImm<int8_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_INCD_XPiI: {  // incd xdn{, pattern{, #imm}}
-        results[0] =
-            sveHelp::sveInc_gprImm<int64_t>(operands, metadata, VL_bits);
+        results[0] = sveInc_gprImm<int64_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_INCD_ZPiI: {  // incd zdn.d{, pattern{, #imm}}
-        results[0] = sveHelp::sveInc_imm<int64_t>(operands, metadata, VL_bits);
+        results[0] = sveInc_imm<int64_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_INCH_XPiI: {  // inch xdn{, pattern{, #imm}}
-        results[0] =
-            sveHelp::sveInc_gprImm<int16_t>(operands, metadata, VL_bits);
+        results[0] = sveInc_gprImm<int16_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_INCH_ZPiI: {  // inch zdn.h{, pattern{, #imm}}
-        results[0] = sveHelp::sveInc_imm<int16_t>(operands, metadata, VL_bits);
+        results[0] = sveInc_imm<int16_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_INCP_XP_B: {  // incp xdn, pm.b
-        results[0] = sveHelp::sveIncp_gpr<uint8_t>(operands, VL_bits);
+        results[0] = sveIncp_gpr<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_INCP_XP_D: {  // incp xdn, pm.d
-        results[0] = sveHelp::sveIncp_gpr<uint64_t>(operands, VL_bits);
+        results[0] = sveIncp_gpr<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_INCP_XP_H: {  // incp xdn, pm.h
-        results[0] = sveHelp::sveIncp_gpr<uint16_t>(operands, VL_bits);
+        results[0] = sveIncp_gpr<uint16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_INCP_XP_S: {  // incp xdn, pm.s
-        results[0] = sveHelp::sveIncp_gpr<uint32_t>(operands, VL_bits);
+        results[0] = sveIncp_gpr<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_INCW_XPiI: {  // incw xdn{, pattern{, #imm}}
-        results[0] =
-            sveHelp::sveInc_gprImm<int32_t>(operands, metadata, VL_bits);
+        results[0] = sveInc_gprImm<int32_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_INCW_ZPiI: {  // incw zdn.s{, pattern{, #imm}}
-        results[0] = sveHelp::sveInc_imm<int32_t>(operands, metadata, VL_bits);
+        results[0] = sveInc_imm<int32_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_INDEX_II_B: {  // index zd.b, #imm, #imm
-        results[0] =
-            sveHelp::sveIndex<int8_t>(operands, metadata, VL_bits, true, true);
+        results[0] = sveIndex<int8_t>(operands, metadata, VL_bits, true, true);
         break;
       }
       case Opcode::AArch64_INDEX_II_D: {  // index zd.d, #imm, #imm
-        results[0] =
-            sveHelp::sveIndex<int64_t>(operands, metadata, VL_bits, true, true);
+        results[0] = sveIndex<int64_t>(operands, metadata, VL_bits, true, true);
         break;
       }
       case Opcode::AArch64_INDEX_II_H: {  // index zd.h, #imm, #imm
-        results[0] =
-            sveHelp::sveIndex<int16_t>(operands, metadata, VL_bits, true, true);
+        results[0] = sveIndex<int16_t>(operands, metadata, VL_bits, true, true);
         break;
       }
       case Opcode::AArch64_INDEX_II_S: {  // index zd.s, #imm, #imm
-        results[0] =
-            sveHelp::sveIndex<int32_t>(operands, metadata, VL_bits, true, true);
+        results[0] = sveIndex<int32_t>(operands, metadata, VL_bits, true, true);
         break;
       }
       case Opcode::AArch64_INDEX_IR_B: {  // index zd.b, #imm, wn
-        results[0] = sveHelp::sveIndex<int8_t, int32_t>(operands, metadata,
-                                                        VL_bits, true, false);
+        results[0] =
+            sveIndex<int8_t, int32_t>(operands, metadata, VL_bits, true, false);
         break;
       }
       case Opcode::AArch64_INDEX_IR_D: {  // index zd.d, #imm, xn
-        results[0] = sveHelp::sveIndex<int64_t, int64_t>(operands, metadata,
-                                                         VL_bits, true, false);
+        results[0] = sveIndex<int64_t, int64_t>(operands, metadata, VL_bits,
+                                                true, false);
         break;
       }
       case Opcode::AArch64_INDEX_IR_H: {  // index zd.h, #imm, wn
-        results[0] = sveHelp::sveIndex<int16_t, int32_t>(operands, metadata,
-                                                         VL_bits, true, false);
+        results[0] = sveIndex<int16_t, int32_t>(operands, metadata, VL_bits,
+                                                true, false);
         break;
       }
       case Opcode::AArch64_INDEX_IR_S: {  // index zd.s, #imm, wn
-        results[0] = sveHelp::sveIndex<int32_t, int32_t>(operands, metadata,
-                                                         VL_bits, true, false);
+        results[0] = sveIndex<int32_t, int32_t>(operands, metadata, VL_bits,
+                                                true, false);
         break;
       }
       case Opcode::AArch64_INDEX_RI_B: {  // index zd.b, wn, #imm
-        results[0] = sveHelp::sveIndex<int8_t, int32_t>(operands, metadata,
-                                                        VL_bits, false, true);
+        results[0] =
+            sveIndex<int8_t, int32_t>(operands, metadata, VL_bits, false, true);
         break;
       }
       case Opcode::AArch64_INDEX_RI_D: {  // index zd.d, xn, #imm
-        results[0] = sveHelp::sveIndex<int64_t, int64_t>(operands, metadata,
-                                                         VL_bits, false, true);
+        results[0] = sveIndex<int64_t, int64_t>(operands, metadata, VL_bits,
+                                                false, true);
         break;
       }
       case Opcode::AArch64_INDEX_RI_H: {  // index zd.h, wn, #imm
-        results[0] = sveHelp::sveIndex<int16_t, int32_t>(operands, metadata,
-                                                         VL_bits, false, true);
+        results[0] = sveIndex<int16_t, int32_t>(operands, metadata, VL_bits,
+                                                false, true);
         break;
       }
       case Opcode::AArch64_INDEX_RI_S: {  // index zd.s, wn, #imm
-        results[0] = sveHelp::sveIndex<int32_t, int32_t>(operands, metadata,
-                                                         VL_bits, false, true);
+        results[0] = sveIndex<int32_t, int32_t>(operands, metadata, VL_bits,
+                                                false, true);
         break;
       }
       case Opcode::AArch64_INDEX_RR_B: {  // index zd.b, wn, wm
-        results[0] = sveHelp::sveIndex<int8_t, int32_t>(operands, metadata,
-                                                        VL_bits, false, false);
+        results[0] = sveIndex<int8_t, int32_t>(operands, metadata, VL_bits,
+                                               false, false);
         break;
       }
       case Opcode::AArch64_INDEX_RR_D: {  // index zd.d, xn, xm
-        results[0] = sveHelp::sveIndex<int64_t, int64_t>(operands, metadata,
-                                                         VL_bits, false, false);
+        results[0] = sveIndex<int64_t, int64_t>(operands, metadata, VL_bits,
+                                                false, false);
         break;
       }
       case Opcode::AArch64_INDEX_RR_H: {  // index zd.h, wn, wm
-        results[0] = sveHelp::sveIndex<int16_t, int32_t>(operands, metadata,
-                                                         VL_bits, false, false);
+        results[0] = sveIndex<int16_t, int32_t>(operands, metadata, VL_bits,
+                                                false, false);
         break;
       }
       case Opcode::AArch64_INDEX_RR_S: {  // index zd.s, wn, wm
-        results[0] = sveHelp::sveIndex<int32_t, int32_t>(operands, metadata,
-                                                         VL_bits, false, false);
+        results[0] = sveIndex<int32_t, int32_t>(operands, metadata, VL_bits,
+                                                false, false);
         break;
       }
       case Opcode::AArch64_INSvi16gpr: {  // ins vd.h[index], wn
-        results[0] = neonHelp::vecInsIndex_gpr<uint16_t, uint32_t, 8>(operands,
-                                                                      metadata);
+        results[0] = vecInsIndex_gpr<uint16_t, uint32_t, 8>(operands, metadata);
         break;
       }
       case Opcode::AArch64_INSvi32gpr: {  // ins vd.s[index], wn
-        results[0] = neonHelp::vecInsIndex_gpr<uint32_t, uint32_t, 4>(operands,
-                                                                      metadata);
+        results[0] = vecInsIndex_gpr<uint32_t, uint32_t, 4>(operands, metadata);
         break;
       }
       case Opcode::AArch64_INSvi32lane: {  // ins vd.s[index1], vn.s[index2]
-        results[0] = neonHelp::vecIns_2Index<uint32_t, 4>(operands, metadata);
+        results[0] = vecIns_2Index<uint32_t, 4>(operands, metadata);
         break;
       }
       case Opcode::AArch64_INSvi64gpr: {  // ins vd.d[index], xn
-        results[0] = neonHelp::vecInsIndex_gpr<uint64_t, uint64_t, 2>(operands,
-                                                                      metadata);
+        results[0] = vecInsIndex_gpr<uint64_t, uint64_t, 2>(operands, metadata);
         break;
       }
       case Opcode::AArch64_INSvi64lane: {  // ins vd.d[index1], vn.d[index2]
-        results[0] = neonHelp::vecIns_2Index<uint64_t, 2>(operands, metadata);
+        results[0] = vecIns_2Index<uint64_t, 2>(operands, metadata);
         break;
       }
       case Opcode::AArch64_INSvi8gpr: {  // ins vd.b[index], wn
-        results[0] = neonHelp::vecInsIndex_gpr<uint8_t, uint32_t, 16>(operands,
-                                                                      metadata);
+        results[0] = vecInsIndex_gpr<uint8_t, uint32_t, 16>(operands, metadata);
         break;
       }
       case Opcode::AArch64_LD1_MXIPXX_H_D: {  // ld1d {zath.d[ws, #imm]}, pg/z,
@@ -3682,53 +3595,47 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_LSLVWr: {  // lslv wd, wn, wm
-        results[0] = {
-            logicalHelp::logicalShiftLR_3ops<uint32_t>(operands, true), 8};
+        results[0] = {logicalShiftLR_3ops<uint32_t>(operands, true), 8};
         break;
       }
       case Opcode::AArch64_LSLVXr: {  // lslv xd, xn, xm
-        results[0] = logicalHelp::logicalShiftLR_3ops<uint64_t>(operands, true);
+        results[0] = logicalShiftLR_3ops<uint64_t>(operands, true);
         break;
       }
       case Opcode::AArch64_LSL_ZZI_S: {  // lsl zd.s, zn.s, #imm
-        results[0] = sveHelp::sveLsl_imm<uint32_t>(operands, metadata, VL_bits);
+        results[0] = sveLsl_imm<uint32_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_LSRVWr: {  // lsrv wd, wn, wm
-        results[0] = {
-            logicalHelp::logicalShiftLR_3ops<uint32_t>(operands, false), 8};
+        results[0] = {logicalShiftLR_3ops<uint32_t>(operands, false), 8};
         break;
       }
       case Opcode::AArch64_LSRVXr: {  // lsrv xd, xn, xm
-        results[0] =
-            logicalHelp::logicalShiftLR_3ops<uint64_t>(operands, false);
+        results[0] = logicalShiftLR_3ops<uint64_t>(operands, false);
         break;
       }
       case Opcode::AArch64_MADDWrrr: {  // madd wd, wn, wm, wa
-        results[0] = {multiplyHelp::madd_4ops<uint32_t>(operands), 8};
+        results[0] = {madd_4ops<uint32_t>(operands), 8};
         break;
       }
       case Opcode::AArch64_MADDXrrr: {  // madd xd, xn, xm, xa
-        results[0] = multiplyHelp::madd_4ops<uint64_t>(operands);
+        results[0] = madd_4ops<uint64_t>(operands);
         break;
       }
       case Opcode::AArch64_MLA_ZPmZZ_B: {  // mla zda.b, pg/m, zn.b, zm.b
-        results[0] = sveHelp::sveMlaPredicated_vecs<uint8_t>(operands, VL_bits);
+        results[0] = sveMlaPredicated_vecs<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_MLA_ZPmZZ_D: {  // mla zda.d, pg/m, zn.d, zm.d
-        results[0] =
-            sveHelp::sveMlaPredicated_vecs<uint64_t>(operands, VL_bits);
+        results[0] = sveMlaPredicated_vecs<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_MLA_ZPmZZ_H: {  // mla zda.h, pg/m, zn.h, zm.h
-        results[0] =
-            sveHelp::sveMlaPredicated_vecs<uint16_t>(operands, VL_bits);
+        results[0] = sveMlaPredicated_vecs<uint16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_MLA_ZPmZZ_S: {  // mla zda.s, pg/m, zn.s, zm.s
-        results[0] =
-            sveHelp::sveMlaPredicated_vecs<uint32_t>(operands, VL_bits);
+        results[0] = sveMlaPredicated_vecs<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_MOVID: {  // movi dd, #imm
@@ -3736,59 +3643,57 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_MOVIv16b_ns: {  // movi vd.16b, #imm
-        results[0] = neonHelp::vecMovi_imm<uint8_t, 16>(metadata);
+        results[0] = vecMovi_imm<uint8_t, 16>(metadata);
         break;
       }
       case Opcode::AArch64_MOVIv2d_ns: {  // movi vd.2d, #imm
-        results[0] = neonHelp::vecMovi_imm<uint64_t, 2>(metadata);
+        results[0] = vecMovi_imm<uint64_t, 2>(metadata);
         break;
       }
       case Opcode::AArch64_MOVIv2i32: {  // movi vd.2s, #imm{, lsl #shift}
-        results[0] = neonHelp::vecMoviShift_imm<uint32_t, 2>(metadata, false);
+        results[0] = vecMoviShift_imm<uint32_t, 2>(metadata, false);
         break;
       }
       case Opcode::AArch64_MOVIv4i32: {  // movi vd.4s, #imm{, LSL #shift}
-        results[0] = neonHelp::vecMoviShift_imm<uint32_t, 4>(metadata, false);
+        results[0] = vecMoviShift_imm<uint32_t, 4>(metadata, false);
         break;
       }
       case Opcode::AArch64_MOVIv8b_ns: {  // movi vd.8b, #imm
-        results[0] = neonHelp::vecMovi_imm<uint8_t, 8>(metadata);
+        results[0] = vecMovi_imm<uint8_t, 8>(metadata);
         break;
       }
       case Opcode::AArch64_MOVKWi: {  // movk wd, #imm
-        results[0] = {
-            arithmeticHelp::movkShift_imm<uint32_t>(operands, metadata), 8};
+        results[0] = {movkShift_imm<uint32_t>(operands, metadata), 8};
         break;
       }
       case Opcode::AArch64_MOVKXi: {  // movk xd, #imm
-        results[0] =
-            arithmeticHelp::movkShift_imm<uint64_t>(operands, metadata);
+        results[0] = movkShift_imm<uint64_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_MOVNWi: {  // movn wd, #imm{, LSL #shift}
-        results[0] = {arithmeticHelp::movnShift_imm<uint32_t>(
+        results[0] = {movnShift_imm<uint32_t>(
                           metadata, [](uint64_t x) -> uint32_t { return ~x; }),
                       8};
         break;
       }
       case Opcode::AArch64_MOVNXi: {  // movn xd, #imm{, LSL #shift}
-        results[0] = arithmeticHelp::movnShift_imm<uint64_t>(
+        results[0] = movnShift_imm<uint64_t>(
             metadata, [](uint64_t x) -> uint64_t { return ~x; });
         break;
       }
       case Opcode::AArch64_MOVPRFX_ZPmZ_D: {  // movprfx zd.d, pg/m, zn.d
-        results[0] = sveHelp::sveMovprfxPredicated_destUnchanged<uint64_t>(
-            operands, VL_bits);
+        results[0] =
+            sveMovprfxPredicated_destUnchanged<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_MOVPRFX_ZPzZ_D: {  // movprfx zd.d, pg/z, zn.d
-        results[0] = sveHelp::sveMovprfxPredicated_destToZero<uint64_t>(
-            operands, VL_bits);
+        results[0] =
+            sveMovprfxPredicated_destToZero<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_MOVPRFX_ZPzZ_S: {  // movprfx zd.s, pg/z, zn.s
-        results[0] = sveHelp::sveMovprfxPredicated_destToZero<uint32_t>(
-            operands, VL_bits);
+        results[0] =
+            sveMovprfxPredicated_destToZero<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_MOVPRFX_ZZ: {  // movprfx zd, zn
@@ -3797,13 +3702,13 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_MOVZWi: {  // movz wd, #imm
-        results[0] = {arithmeticHelp::movnShift_imm<uint32_t>(
+        results[0] = {movnShift_imm<uint32_t>(
                           metadata, [](uint64_t x) -> uint32_t { return x; }),
                       8};
         break;
       }
       case Opcode::AArch64_MOVZXi: {  // movz xd, #imm
-        results[0] = arithmeticHelp::movnShift_imm<uint64_t>(
+        results[0] = movnShift_imm<uint64_t>(
             metadata, [](uint64_t x) -> uint64_t { return x; });
         break;
       }
@@ -3823,11 +3728,11 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_MSUBWrrr: {  // msub wd, wn, wm, wa
-        results[0] = {multiplyHelp::msub_4ops<uint32_t>(operands), 8};
+        results[0] = {msub_4ops<uint32_t>(operands), 8};
         break;
       }
       case Opcode::AArch64_MSUBXrrr: {  // msub xd, xn, xm, xa
-        results[0] = multiplyHelp::msub_4ops<uint64_t>(operands);
+        results[0] = msub_4ops<uint64_t>(operands);
         break;
       }
       case Opcode::AArch64_MSRpstatesvcrImm1: {  // msr svcr<sm|za|smza>, #imm
@@ -3850,118 +3755,116 @@ void Instruction::execute() {
         }
       }
       case Opcode::AArch64_MUL_ZPmZ_B: {  // mul zdn.b, pg/m, zdn.b, zm.b
-        results[0] = sveHelp::sveMulPredicated<uint8_t>(operands, metadata,
-                                                        VL_bits, false);
+        results[0] =
+            sveMulPredicated<uint8_t>(operands, metadata, VL_bits, false);
         break;
       }
       case Opcode::AArch64_MUL_ZPmZ_D: {  // mul zdn.d, pg/m, zdn.d, zm.d
-        results[0] = sveHelp::sveMulPredicated<uint64_t>(operands, metadata,
-                                                         VL_bits, false);
+        results[0] =
+            sveMulPredicated<uint64_t>(operands, metadata, VL_bits, false);
         break;
       }
       case Opcode::AArch64_MUL_ZPmZ_H: {  // mul zdn.h, pg/m, zdn.h, zm.h
-        results[0] = sveHelp::sveMulPredicated<uint16_t>(operands, metadata,
-                                                         VL_bits, false);
+        results[0] =
+            sveMulPredicated<uint16_t>(operands, metadata, VL_bits, false);
         break;
       }
       case Opcode::AArch64_MUL_ZPmZ_S: {  // mul zdn.s, pg/m, zdn.s, zm.s
-        results[0] = sveHelp::sveMulPredicated<uint32_t>(operands, metadata,
-                                                         VL_bits, false);
+        results[0] =
+            sveMulPredicated<uint32_t>(operands, metadata, VL_bits, false);
         break;
       }
       case Opcode::AArch64_MVNIv2i32: {  // mvni vd.2s, #imm{, lsl #shift}
-        results[0] = neonHelp::vecMoviShift_imm<uint32_t, 2>(metadata, true);
+        results[0] = vecMoviShift_imm<uint32_t, 2>(metadata, true);
         break;
       }
       case Opcode::AArch64_MVNIv2s_msl: {  // mvni vd.2s, #imm, msl #amount
-        results[0] = neonHelp::vecMoviShift_imm<uint32_t, 2>(metadata, true);
+        results[0] = vecMoviShift_imm<uint32_t, 2>(metadata, true);
         break;
       }
       case Opcode::AArch64_MVNIv4i16: {  // mvni vd.4h, #imm{, lsl #shift}
-        results[0] = neonHelp::vecMoviShift_imm<uint16_t, 4>(metadata, true);
+        results[0] = vecMoviShift_imm<uint16_t, 4>(metadata, true);
         break;
       }
       case Opcode::AArch64_MVNIv4i32: {  // mvni vd.4s, #imm{, lsl #shift}
-        results[0] = neonHelp::vecMoviShift_imm<uint32_t, 4>(metadata, true);
+        results[0] = vecMoviShift_imm<uint32_t, 4>(metadata, true);
         break;
       }
       case Opcode::AArch64_MVNIv4s_msl: {  // mvni vd.4s #imm, msl #amount
-        results[0] = neonHelp::vecMoviShift_imm<uint32_t, 4>(metadata, true);
+        results[0] = vecMoviShift_imm<uint32_t, 4>(metadata, true);
         break;
       }
       case Opcode::AArch64_MVNIv8i16: {  // mvni vd.8h, #imm{, lsl #shift}
-        results[0] = neonHelp::vecMoviShift_imm<uint16_t, 8>(metadata, true);
+        results[0] = vecMoviShift_imm<uint16_t, 8>(metadata, true);
         break;
       }
       case Opcode::AArch64_NEGv2i64: {  // neg vd.2d, vn.2d
-        results[0] = neonHelp::vecFneg_2ops<int64_t, 2>(operands);
+        results[0] = vecFneg_2ops<int64_t, 2>(operands);
         break;
       }
       case Opcode::AArch64_NOTv16i8: {  // not vd.16b, vn.16b
-        results[0] = neonHelp::vecLogicOp_2vecs<uint8_t, 16>(
+        results[0] = vecLogicOp_2vecs<uint8_t, 16>(
             operands, [](uint8_t x) -> uint8_t { return ~x; });
         break;
       }
       case Opcode::AArch64_NOTv8i8: {  // not vd.8b, vn.8b
-        results[0] = neonHelp::vecLogicOp_2vecs<uint8_t, 8>(
+        results[0] = vecLogicOp_2vecs<uint8_t, 8>(
             operands, [](uint8_t x) -> uint8_t { return ~x; });
         break;
       }
       case Opcode::AArch64_ORNWrs: {  // orn wd, wn, wm{, shift{ #amount}}
-        auto [result, nzcv] = logicalHelp::logicOpShift_3ops<uint32_t>(
+        auto [result, nzcv] = logicOpShift_3ops<uint32_t>(
             operands, metadata, false,
             [](uint32_t x, uint32_t y) -> uint32_t { return x | (~y); });
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_ORNXrs: {  // orn xd, xn, xm{, shift{ #amount}}
-        auto [result, nzcv] = logicalHelp::logicOpShift_3ops<uint64_t>(
+        auto [result, nzcv] = logicOpShift_3ops<uint64_t>(
             operands, metadata, false,
             [](uint64_t x, uint64_t y) -> uint64_t { return x | (~y); });
         results[0] = result;
         break;
       }
       case Opcode::AArch64_ORRWri: {  // orr wd, wn, #imm
-        auto [result, nzcv] = logicalHelp::logicOp_imm<uint32_t>(
+        auto [result, nzcv] = logicOp_imm<uint32_t>(
             operands, metadata, false,
             [](uint32_t x, uint32_t y) -> uint32_t { return x | y; });
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_ORRWrs: {  // orr wd, wn, wm{, shift{ #amount}}
-        results[0] = {
-            comparisonHelp::orrShift_3ops<uint32_t>(operands, metadata), 8};
+        results[0] = {orrShift_3ops<uint32_t>(operands, metadata), 8};
         break;
       }
       case Opcode::AArch64_ORRXri: {  // orr xd, xn, #imm
-        auto [result, nzcv] = logicalHelp::logicOp_imm<uint64_t>(
+        auto [result, nzcv] = logicOp_imm<uint64_t>(
             operands, metadata, false,
             [](uint64_t x, uint64_t y) -> uint64_t { return x | y; });
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_ORRXrs: {  // orr xd, xn, xm{, shift{ #amount}}
-        results[0] =
-            comparisonHelp::orrShift_3ops<uint64_t>(operands, metadata);
+        results[0] = orrShift_3ops<uint64_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_ORR_PPzPP: {  // orr pd.b, pg/z, pn.b, pm.b
-        results[0] = sveHelp::sveLogicOp_preds<uint8_t>(
+        results[0] = sveLogicOp_preds<uint8_t>(
             operands, VL_bits,
             [](uint64_t x, uint64_t y) -> uint64_t { return x | y; });
         break;
       }
       case Opcode::AArch64_ORR_ZZZ: {  // orr zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveOrr_3vecs<uint64_t>(operands, VL_bits);
+        results[0] = sveOrr_3vecs<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_ORRv16i8: {  // orr vd.16b, Vn.16b, Vm.16b
-        results[0] = neonHelp::vecLogicOp_3vecs<uint8_t, 16>(
+        results[0] = vecLogicOp_3vecs<uint8_t, 16>(
             operands, [](uint8_t x, uint8_t y) -> uint8_t { return x | y; });
         break;
       }
       case Opcode::AArch64_ORRv8i8: {  // orr vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecLogicOp_3vecs<uint8_t, 8>(
+        results[0] = vecLogicOp_3vecs<uint8_t, 8>(
             operands, [](uint8_t x, uint8_t y) -> uint8_t { return x | y; });
         break;
       }
@@ -3974,19 +3877,19 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_PSEL_PPPRI_B: {  // psel pd, pn, pm.b[wa, #imm]
-        results[0] = sveHelp::svePsel<uint8_t>(operands, metadata, VL_bits);
+        results[0] = svePsel<uint8_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_PSEL_PPPRI_D: {  // psel pd, pn, pm.d[wa, #imm]
-        results[0] = sveHelp::svePsel<uint64_t>(operands, metadata, VL_bits);
+        results[0] = svePsel<uint64_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_PSEL_PPPRI_H: {  // psel pd, pn, pm.h[wa, #imm]
-        results[0] = sveHelp::svePsel<uint16_t>(operands, metadata, VL_bits);
+        results[0] = svePsel<uint16_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_PSEL_PPPRI_S: {  // psel pd, pn, pm.s[wa, #imm]
-        results[0] = sveHelp::svePsel<uint32_t>(operands, metadata, VL_bits);
+        results[0] = svePsel<uint32_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_PTEST_PP: {  // ptest pg, pn.b
@@ -3995,39 +3898,39 @@ void Instruction::execute() {
         std::array<uint64_t, 4> masked_n = {(g[0] & s[0]), (g[1] & s[1]),
                                             (g[2] & s[2]), (g[3] & s[3])};
         // Byte count = 1 as destination predicate is regarding single bytes.
-        results[0] = AuxFunc::getNZCVfromPred(masked_n, VL_bits, 1);
+        results[0] = getNZCVfromPred(masked_n, VL_bits, 1);
         break;
       }
       case Opcode::AArch64_PTRUE_B: {  // ptrue pd.b{, pattern}
-        results[0] = sveHelp::svePtrue<uint8_t>(metadata, VL_bits);
+        results[0] = svePtrue<uint8_t>(metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_PTRUE_D: {  // ptrue pd.d{, pattern}
-        results[0] = sveHelp::svePtrue<uint64_t>(metadata, VL_bits);
+        results[0] = svePtrue<uint64_t>(metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_PTRUE_H: {  // ptrue pd.h{, pattern}
-        results[0] = sveHelp::svePtrue<uint16_t>(metadata, VL_bits);
+        results[0] = svePtrue<uint16_t>(metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_PTRUE_S: {  // ptrue pd.s{, pattern}
-        results[0] = sveHelp::svePtrue<uint32_t>(metadata, VL_bits);
+        results[0] = svePtrue<uint32_t>(metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_PUNPKHI_PP: {  // punpkhi pd.h, pn.b
-        results[0] = sveHelp::svePunpk(operands, VL_bits, true);
+        results[0] = svePunpk(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_PUNPKLO_PP: {  // punpklo pd.h, pn.b
-        results[0] = sveHelp::svePunpk(operands, VL_bits, false);
+        results[0] = svePunpk(operands, VL_bits, false);
         break;
       }
       case Opcode::AArch64_RBITWr: {  // rbit wd, wn
-        results[0] = {bitmanipHelp::rbit<uint32_t>(operands, metadata), 8};
+        results[0] = {rbit<uint32_t>(operands, metadata), 8};
         break;
       }
       case Opcode::AArch64_RBITXr: {  // rbit xd, xn
-        results[0] = bitmanipHelp::rbit<uint64_t>(operands, metadata);
+        results[0] = rbit<uint64_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_RDVLI_XI: {  // rdvl xd, #imm
@@ -4041,129 +3944,123 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_REV16v16i8: {  // rev16 Vd.16b, Vn.16b
-        results[0] = neonHelp::vecRev<int8_t, 16, 16>(operands);
+        results[0] = vecRev<int8_t, 16, 16>(operands);
         break;
       }
       case Opcode::AArch64_REV16v8i8: {  // rev16 Vd.8b, Vn.8b
-        results[0] = neonHelp::vecRev<int8_t, 16, 8>(operands);
+        results[0] = vecRev<int8_t, 16, 8>(operands);
         break;
       }
       case Opcode::AArch64_REV32v16i8: {  // rev32 Vd.16b, Vn.16b
-        results[0] = neonHelp::vecRev<int8_t, 32, 16>(operands);
+        results[0] = vecRev<int8_t, 32, 16>(operands);
         break;
       }
       case Opcode::AArch64_REV32v4i16: {  // rev32 Vd.4h, Vn.4h
-        results[0] = neonHelp::vecRev<int16_t, 32, 4>(operands);
+        results[0] = vecRev<int16_t, 32, 4>(operands);
         break;
       }
       case Opcode::AArch64_REV32v8i16: {  // rev32 Vd.8h, Vn.8h
-        results[0] = neonHelp::vecRev<int16_t, 32, 8>(operands);
+        results[0] = vecRev<int16_t, 32, 8>(operands);
         break;
       }
       case Opcode::AArch64_REV32v8i8: {  // rev32 Vd.8b, Vn.8b
-        results[0] = neonHelp::vecRev<int8_t, 32, 8>(operands);
+        results[0] = vecRev<int8_t, 32, 8>(operands);
         break;
       }
       case Opcode::AArch64_REV64v16i8: {  // rev64 Vd.16b, Vn.16b
-        results[0] = neonHelp::vecRev<int8_t, 64, 16>(operands);
+        results[0] = vecRev<int8_t, 64, 16>(operands);
         break;
       }
       case Opcode::AArch64_REV64v2i32: {  // rev64 Vd.2s, Vn.2s
-        results[0] = neonHelp::vecRev<int32_t, 64, 2>(operands);
+        results[0] = vecRev<int32_t, 64, 2>(operands);
         break;
       }
       case Opcode::AArch64_REV64v4i16: {  // rev64 Vd.4h, Vn.4h
-        results[0] = neonHelp::vecRev<int16_t, 64, 4>(operands);
+        results[0] = vecRev<int16_t, 64, 4>(operands);
         break;
       }
       case Opcode::AArch64_REV64v4i32: {  // rev64 Vd.4s, Vn.4s
-        results[0] = neonHelp::vecRev<int32_t, 64, 4>(operands);
+        results[0] = vecRev<int32_t, 64, 4>(operands);
         break;
       }
       case Opcode::AArch64_REV64v8i16: {  // rev64 Vd.8h, Vn.8h
-        results[0] = neonHelp::vecRev<int16_t, 64, 8>(operands);
+        results[0] = vecRev<int16_t, 64, 8>(operands);
         break;
       }
       case Opcode::AArch64_REV64v8i8: {  // rev64 Vd.8b Vn.8b
-        results[0] = neonHelp::vecRev<int8_t, 64, 8>(operands);
+        results[0] = vecRev<int8_t, 64, 8>(operands);
         break;
       }
       case Opcode::AArch64_REVXr: {  // rev xd, xn
-        results[0] = bitmanipHelp::rev<uint64_t>(operands);
+        results[0] = rev<uint64_t>(operands);
         break;
       }
       case Opcode::AArch64_REV_PP_B: {  // rev pd.b, pn.b
-        results[0] = sveHelp::sveRev_predicates<uint8_t>(operands, VL_bits);
+        results[0] = sveRev_predicates<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_REV_PP_D: {  // rev pd.d, pn.d
-        results[0] = sveHelp::sveRev_predicates<uint64_t>(operands, VL_bits);
+        results[0] = sveRev_predicates<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_REV_PP_H: {  // rev pd.h, pn.h
-        results[0] = sveHelp::sveRev_predicates<uint16_t>(operands, VL_bits);
+        results[0] = sveRev_predicates<uint16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_REV_PP_S: {  // rev pd.s, pn.s
-        results[0] = sveHelp::sveRev_predicates<uint32_t>(operands, VL_bits);
+        results[0] = sveRev_predicates<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_REV_ZZ_B: {  // rev zd.b, zn.b
-        results[0] = sveHelp::sveRev_vecs<uint8_t>(operands, VL_bits);
+        results[0] = sveRev_vecs<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_REV_ZZ_D: {  // rev zd.d, zn.d
-        results[0] = sveHelp::sveRev_vecs<uint64_t>(operands, VL_bits);
+        results[0] = sveRev_vecs<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_REV_ZZ_H: {  // rev zd.h, zn.h
-        results[0] = sveHelp::sveRev_vecs<uint16_t>(operands, VL_bits);
+        results[0] = sveRev_vecs<uint16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_REV_ZZ_S: {  // rev zd.s, zn.s
-        results[0] = sveHelp::sveRev_vecs<uint32_t>(operands, VL_bits);
+        results[0] = sveRev_vecs<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_RORVWr: {  // rorv wd, wn, wm
-        results[0] = {logicalHelp::rorv_3ops<uint32_t>(operands), 8};
+        results[0] = {rorv_3ops<uint32_t>(operands), 8};
         break;
       }
       case Opcode::AArch64_RORVXr: {  // rorv xd, xn, xm
-        results[0] = logicalHelp::rorv_3ops<uint64_t>(operands);
+        results[0] = rorv_3ops<uint64_t>(operands);
         break;
       }
       case Opcode::AArch64_SBCWr: {  // sbc wd, wn, wm
-        results[0] = {arithmeticHelp::sbc<uint32_t>(operands), 8};
+        results[0] = {sbc<uint32_t>(operands), 8};
         break;
       }
       case Opcode::AArch64_SBCXr: {  // sbc xd, xn, xm
-        results[0] = arithmeticHelp::sbc<uint64_t>(operands);
+        results[0] = sbc<uint64_t>(operands);
         break;
       }
       case Opcode::AArch64_SBFMWri: {  // sbfm wd, wn, #immr, #imms
-        results[0] = {
-            bitmanipHelp::bfm_2imms<uint32_t>(operands, metadata, true, true),
-            8};
+        results[0] = {bfm_2imms<uint32_t>(operands, metadata, true, true), 8};
         break;
       }
       case Opcode::AArch64_SBFMXri: {  // sbfm xd, xn, #immr, #imms
-        results[0] =
-            bitmanipHelp::bfm_2imms<uint64_t>(operands, metadata, true, true);
+        results[0] = bfm_2imms<uint64_t>(operands, metadata, true, true);
         break;
       }
       case Opcode::AArch64_SCVTFSWSri: {  // scvtf sd, wn, #fbits
-        results[0] =
-            floatHelp::scvtf_FixedPoint<float, int32_t>(operands, metadata);
+        results[0] = scvtf_FixedPoint<float, int32_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_SCVTFSXDri: {  // scvtf dd, xn, #fbits
-        results[0] =
-            floatHelp::scvtf_FixedPoint<double, int64_t>(operands, metadata);
+        results[0] = scvtf_FixedPoint<double, int64_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_SCVTFSXSri: {  // scvtf sd, xn, #fbits
-        results[0] =
-            floatHelp::scvtf_FixedPoint<float, int64_t>(operands, metadata);
+        results[0] = scvtf_FixedPoint<float, int64_t>(operands, metadata);
         break;
       }
       case Opcode::AArch64_SCVTFUWDri: {  // scvtf dd, wn
@@ -4183,23 +4080,19 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_SCVTF_ZPmZ_DtoD: {  // scvtf zd.d, pg/m, zn.d
-        results[0] =
-            sveHelp::sveFcvtPredicated<double, int64_t>(operands, VL_bits);
+        results[0] = sveFcvtPredicated<double, int64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SCVTF_ZPmZ_DtoS: {  // scvtf zd.s, pg/m, zn.d
-        results[0] =
-            sveHelp::sveFcvtPredicated<float, int64_t>(operands, VL_bits);
+        results[0] = sveFcvtPredicated<float, int64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SCVTF_ZPmZ_StoD: {  // scvtf zd.d, pg/m, zn.s
-        results[0] =
-            sveHelp::sveFcvtPredicated<double, int32_t>(operands, VL_bits);
+        results[0] = sveFcvtPredicated<double, int32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SCVTF_ZPmZ_StoS: {  // scvtf zd.s, pg/m, zn.s
-        results[0] =
-            sveHelp::sveFcvtPredicated<float, int32_t>(operands, VL_bits);
+        results[0] = sveFcvtPredicated<float, int32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SCVTFv1i32: {  // scvtf sd, sn
@@ -4211,128 +4104,121 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_SCVTFv2f32: {  // scvtf vd.2s, vn.2s
-        results[0] = neonHelp::vecScvtf_2vecs<float, int32_t, 2>(
+        results[0] = vecScvtf_2vecs<float, int32_t, 2>(
             operands, [](int32_t x) -> float { return static_cast<float>(x); });
         break;
       }
       case Opcode::AArch64_SCVTFv2f64: {  // scvtf vd.2d, vn.2d
-        results[0] = neonHelp::vecScvtf_2vecs<double, int64_t, 2>(
+        results[0] = vecScvtf_2vecs<double, int64_t, 2>(
             operands,
             [](int64_t x) -> double { return static_cast<double>(x); });
         break;
       }
       case Opcode::AArch64_SCVTFv4f32: {  // scvtf vd.4s, vn.4s
-        results[0] = neonHelp::vecScvtf_2vecs<float, int32_t, 4>(
+        results[0] = vecScvtf_2vecs<float, int32_t, 4>(
             operands, [](int32_t x) -> float { return static_cast<float>(x); });
         break;
       }
       case Opcode::AArch64_SDIVWr: {  // sdiv wd, wn, wm
-        results[0] = {divideHelp::div_3ops<int32_t>(operands), 8};
+        results[0] = {div_3ops<int32_t>(operands), 8};
         break;
       }
       case Opcode::AArch64_SDIVXr: {  // sdiv xd, xn, xm
-        results[0] = {divideHelp::div_3ops<int64_t>(operands), 8};
+        results[0] = {div_3ops<int64_t>(operands), 8};
         break;
       }
       case Opcode::AArch64_SEL_ZPZZ_D: {  // sel zd.d, pg, zn.d, zm.d
-        results[0] = sveHelp::sveSel_zpzz<uint64_t>(operands, VL_bits);
+        results[0] = sveSel_zpzz<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SEL_ZPZZ_S: {  // sel zd.s, pg, zn.s, zm.s
-        results[0] = sveHelp::sveSel_zpzz<uint32_t>(operands, VL_bits);
+        results[0] = sveSel_zpzz<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SHLd: {  // shl dd, dn #imm
-        results[0] =
-            neonHelp::vecShlShift_vecImm<uint64_t, 1>(operands, metadata);
+        results[0] = vecShlShift_vecImm<uint64_t, 1>(operands, metadata);
         break;
       }
       case Opcode::AArch64_SHLv4i32_shift: {  // shl vd.4s, vn.4s, #imm
-        results[0] =
-            neonHelp::vecShlShift_vecImm<uint32_t, 4>(operands, metadata);
+        results[0] = vecShlShift_vecImm<uint32_t, 4>(operands, metadata);
         break;
       }
       case Opcode::AArch64_SMADDLrrr: {  // smaddl xd, wn, wm, xa
-        results[0] = multiplyHelp::maddl_4ops<int64_t, int32_t>(operands);
+        results[0] = maddl_4ops<int64_t, int32_t>(operands);
         break;
       }
       case Opcode::AArch64_SMAX_ZI_S: {  // smax zdn.s, zdn.s, #imm
-        results[0] =
-            sveHelp::sveMax_vecImm<int32_t>(operands, metadata, VL_bits);
+        results[0] = sveMax_vecImm<int32_t>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_SMAX_ZPmZ_S: {  // smax zd.s, pg/m, zn.s, zm.s
-        results[0] = sveHelp::sveMaxPredicated_vecs<int32_t>(operands, VL_bits);
+        results[0] = sveMaxPredicated_vecs<int32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SMAXv4i32: {  // smax vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecLogicOp_3vecs<int32_t, 4>(
+        results[0] = vecLogicOp_3vecs<int32_t, 4>(
             operands,
             [](int32_t x, int32_t y) -> int32_t { return std::max(x, y); });
         break;
       }
       case Opcode::AArch64_SMINV_VPZ_S: {  // sminv sd, pg, zn.s
-        results[0] = sveHelp::sveSminv<int32_t>(operands, VL_bits);
+        results[0] = sveSminv<int32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SMINVv4i32v: {  // sminv sd, vn.4s
-        results[0] = neonHelp::vecMinv_2ops<int32_t, 4>(operands);
+        results[0] = vecMinv_2ops<int32_t, 4>(operands);
         break;
       }
       case Opcode::AArch64_SMIN_ZPmZ_S: {  // smin zd.s, pg/m, zn.s, zm.s
-        results[0] = sveHelp::sveLogicOpPredicated_3vecs<int32_t>(
+        results[0] = sveLogicOpPredicated_3vecs<int32_t>(
             operands, VL_bits,
             [](int32_t x, int32_t y) -> int32_t { return std::min(x, y); });
         break;
       }
       case Opcode::AArch64_SMINv4i32: {  // smin vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecLogicOp_3vecs<int32_t, 4>(
+        results[0] = vecLogicOp_3vecs<int32_t, 4>(
             operands,
             [](int32_t x, int32_t y) -> int32_t { return std::min(x, y); });
         break;
       }
       case Opcode::AArch64_SMSUBLrrr: {  // smsubl xd, wn, wm, xa
-        results[0] = arithmeticHelp::msubl_4ops<int64_t, int32_t>(operands);
+        results[0] = msubl_4ops<int64_t, int32_t>(operands);
         break;
       }
       case Opcode::AArch64_SMULH_ZPmZ_B: {  // smulh zdn.b, pg/m, zdn.b, zm.b
-        results[0] =
-            sveHelp::sveMulhPredicated<int8_t, int16_t>(operands, VL_bits);
+        results[0] = sveMulhPredicated<int8_t, int16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SMULH_ZPmZ_H: {  // smulh zdn.h, pg/m, zdn.h, zm.h
-        results[0] =
-            sveHelp::sveMulhPredicated<int16_t, int32_t>(operands, VL_bits);
+        results[0] = sveMulhPredicated<int16_t, int32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SMULH_ZPmZ_S: {  // smulh zdn.s, pg/m, zdn.s, zm.s
-        results[0] =
-            sveHelp::sveMulhPredicated<int32_t, int64_t>(operands, VL_bits);
+        results[0] = sveMulhPredicated<int32_t, int64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SMULHrr: {  // smulh xd, xn, xm
         // TODO: signed
-        results[0] = AuxFunc::mulhi(operands[0].get<uint64_t>(),
-                                    operands[1].get<uint64_t>());
+        results[0] =
+            mulhi(operands[0].get<uint64_t>(), operands[1].get<uint64_t>());
         break;
       }
       case Opcode::AArch64_SSHLLv2i32_shift: {  // sshll vd.2d, vn.2s, #imm
-        results[0] = neonHelp::vecShllShift_vecImm<int64_t, int32_t, 2>(
-            operands, metadata, false);
+        results[0] =
+            vecShllShift_vecImm<int64_t, int32_t, 2>(operands, metadata, false);
         break;
       }
       case Opcode::AArch64_SSHLLv4i32_shift: {  // sshll2 vd.2d, vn.4s, #imm
-        results[0] = neonHelp::vecShllShift_vecImm<int64_t, int32_t, 2>(
-            operands, metadata, true);
+        results[0] =
+            vecShllShift_vecImm<int64_t, int32_t, 2>(operands, metadata, true);
         break;
       }
       case Opcode::AArch64_SHRNv8i8_shift: {  // shrn vd.8b, vn.8h, #imm
-        results[0] = neonHelp::vecShrnShift_imm<uint16_t, uint8_t, 8>(operands,
-                                                                      metadata);
+        results[0] = vecShrnShift_imm<uint16_t, uint8_t, 8>(operands, metadata);
         break;
       }
       case Opcode::AArch64_SSHRv4i32_shift: {  // sshr vd.4s, vn.4s, #imm
-        results[0] = neonHelp::vecSshrShift_imm<int32_t, 4>(operands, metadata);
+        results[0] = vecSshrShift_imm<int32_t, 4>(operands, metadata);
         break;
       }
       case Opcode::AArch64_SST1B_D_REAL: {  // st1b {zd.d}, pg, [xn, zm.d]
@@ -4416,8 +4302,7 @@ void Instruction::execute() {
             (ws + metadata.operands[0].sme_index.disp) % partition_num;
 
         const uint64_t* tileSlice = operands[sliceNum].getAsVector<uint64_t>();
-        memoryData =
-            sveHelp::sve_merge_store_data<uint64_t>(tileSlice, pg, VL_bits);
+        memoryData = sve_merge_store_data<uint64_t>(tileSlice, pg, VL_bits);
 
         break;
       }
@@ -4470,8 +4355,7 @@ void Instruction::execute() {
             (ws + metadata.operands[0].sme_index.disp) % partition_num;
 
         const uint32_t* tileSlice = operands[sliceNum].getAsVector<uint32_t>();
-        memoryData =
-            sveHelp::sve_merge_store_data<uint32_t>(tileSlice, pg, VL_bits);
+        memoryData = sve_merge_store_data<uint32_t>(tileSlice, pg, VL_bits);
 
         break;
       }
@@ -4547,7 +4431,7 @@ void Instruction::execute() {
         const uint8_t* d = operands[0].getAsVector<uint8_t>();
         const uint64_t* p = operands[1].getAsVector<uint64_t>();
 
-        memoryData = sveHelp::sve_merge_store_data<uint8_t>(d, p, VL_bits);
+        memoryData = sve_merge_store_data<uint8_t>(d, p, VL_bits);
         break;
       }
       case Opcode::AArch64_ST1B_IMM: {  // st1b {zt.b}, pg, [xn{, #imm, mul vl}]
@@ -4555,7 +4439,7 @@ void Instruction::execute() {
         const uint8_t* d = operands[0].getAsVector<uint8_t>();
         const uint64_t* p = operands[1].getAsVector<uint64_t>();
 
-        memoryData = sveHelp::sve_merge_store_data<uint8_t>(d, p, VL_bits);
+        memoryData = sve_merge_store_data<uint8_t>(d, p, VL_bits);
         break;
       }
       case Opcode::AArch64_ST1D: {  // st1d {zt.d}, pg, [xn, xm, lsl #3]
@@ -4563,7 +4447,7 @@ void Instruction::execute() {
         const uint64_t* d = operands[0].getAsVector<uint64_t>();
         const uint64_t* p = operands[1].getAsVector<uint64_t>();
 
-        memoryData = sveHelp::sve_merge_store_data<uint64_t>(d, p, VL_bits);
+        memoryData = sve_merge_store_data<uint64_t>(d, p, VL_bits);
         break;
       }
       case Opcode::AArch64_ST1D_IMM: {  // st1d {zt.d}, pg, [xn{, #imm, mul vl}]
@@ -4571,7 +4455,7 @@ void Instruction::execute() {
         const uint64_t* d = operands[0].getAsVector<uint64_t>();
         const uint64_t* p = operands[1].getAsVector<uint64_t>();
 
-        memoryData = sveHelp::sve_merge_store_data<uint64_t>(d, p, VL_bits);
+        memoryData = sve_merge_store_data<uint64_t>(d, p, VL_bits);
         break;
       }
       case Opcode::AArch64_ST1Fourv16b: {  // st1 {vt.16b, vt2.16b, vt3.16b,
@@ -4732,7 +4616,7 @@ void Instruction::execute() {
         const uint32_t* d = operands[0].getAsVector<uint32_t>();
         const uint64_t* p = operands[1].getAsVector<uint64_t>();
 
-        memoryData = sveHelp::sve_merge_store_data<uint32_t>(d, p, VL_bits);
+        memoryData = sve_merge_store_data<uint32_t>(d, p, VL_bits);
         break;
       }
       case Opcode::AArch64_ST1W_D: {  // st1w {zt.d}, pg, [xn, xm, lsl #2]
@@ -4740,8 +4624,7 @@ void Instruction::execute() {
         const uint64_t* d = operands[0].getAsVector<uint64_t>();
         const uint64_t* p = operands[1].getAsVector<uint64_t>();
 
-        memoryData =
-            sveHelp::sve_merge_store_data<uint64_t, uint32_t>(d, p, VL_bits);
+        memoryData = sve_merge_store_data<uint64_t, uint32_t>(d, p, VL_bits);
         break;
       }
       case Opcode::AArch64_ST1W_IMM: {  // st1w {zt.s}, pg, [xn{, #imm, mul vl}]
@@ -4749,7 +4632,7 @@ void Instruction::execute() {
         const uint32_t* d = operands[0].getAsVector<uint32_t>();
         const uint64_t* p = operands[1].getAsVector<uint64_t>();
 
-        memoryData = sveHelp::sve_merge_store_data<uint32_t>(d, p, VL_bits);
+        memoryData = sve_merge_store_data<uint32_t>(d, p, VL_bits);
         break;
       }
       case Opcode::AArch64_ST1i16: {  // st1 {vt.h}[index], [xn]
@@ -5113,36 +4996,32 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_SUBSWri: {  // subs wd, wn, #imm
-        auto [result, nzcv] =
-            arithmeticHelp::subShift_imm<uint32_t>(operands, metadata, true);
+        auto [result, nzcv] = subShift_imm<uint32_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = {result, 8};
         break;
       }
       case Opcode::AArch64_SUBSWrs: {  // subs wd, wn, wm{, shift #amount}
-        auto [result, nzcv] =
-            arithmeticHelp::subShift_3ops<uint32_t>(operands, metadata, true);
+        auto [result, nzcv] = subShift_3ops<uint32_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = {result, 8};
         break;
       }
       case Opcode::AArch64_SUBSWrx: {  // subs wd, wn, wm{, extend #amount}
         auto [result, nzcv] =
-            arithmeticHelp::subExtend_3ops<uint32_t>(operands, metadata, true);
+            subExtend_3ops<uint32_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = {result, 8};
         break;
       }
       case Opcode::AArch64_SUBSXri: {  // subs xd, xn, #imm
-        auto [result, nzcv] =
-            arithmeticHelp::subShift_imm<uint64_t>(operands, metadata, true);
+        auto [result, nzcv] = subShift_imm<uint64_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = result;
         break;
       }
       case Opcode::AArch64_SUBSXrs: {  // subs xd, xn, xm{, shift #amount}
-        auto [result, nzcv] =
-            arithmeticHelp::subShift_3ops<uint64_t>(operands, metadata, true);
+        auto [result, nzcv] = subShift_3ops<uint64_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = result;
         break;
@@ -5150,95 +5029,93 @@ void Instruction::execute() {
       case Opcode::AArch64_SUBSXrx:      // subs xd, xn, wm{, extend #amount}
       case Opcode::AArch64_SUBSXrx64: {  // subs xd, xn, xm{, extend #amount}
         auto [result, nzcv] =
-            arithmeticHelp::subExtend_3ops<uint64_t>(operands, metadata, true);
+            subExtend_3ops<uint64_t>(operands, metadata, true);
         results[0] = nzcv;
         results[1] = result;
         break;
       }
       case Opcode::AArch64_SUBWri: {  // sub wd, wn, #imm{, <shift>}
-        auto [result, nzcv] =
-            arithmeticHelp::subShift_imm<uint32_t>(operands, metadata, false);
+        auto [result, nzcv] = subShift_imm<uint32_t>(operands, metadata, false);
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_SUBWrs: {  // sub wd, wn, wm{, shift #amount}
         auto [result, nzcv] =
-            arithmeticHelp::subShift_3ops<uint32_t>(operands, metadata, false);
+            subShift_3ops<uint32_t>(operands, metadata, false);
         results[0] = {result, 8};
         break;
       }
       case Opcode::AArch64_SUBXri: {  // sub xd, xn, #imm{, <shift>}
-        auto [result, nzcv] =
-            arithmeticHelp::subShift_imm<uint64_t>(operands, metadata, false);
+        auto [result, nzcv] = subShift_imm<uint64_t>(operands, metadata, false);
         results[0] = result;
         break;
       }
       case Opcode::AArch64_SUBXrs: {  // sub xd, xn, xm{, shift #amount}
         auto [result, nzcv] =
-            arithmeticHelp::subShift_3ops<uint64_t>(operands, metadata, false);
+            subShift_3ops<uint64_t>(operands, metadata, false);
         results[0] = result;
         break;
       }
       case Opcode::AArch64_SUBXrx:      // sub xd, xn, wm{, extend #amount}
       case Opcode::AArch64_SUBXrx64: {  // sub xd, xn, xm{, extend #amount}
         auto [result, nzcv] =
-            arithmeticHelp::subExtend_3ops<uint64_t>(operands, metadata, false);
+            subExtend_3ops<uint64_t>(operands, metadata, false);
         results[0] = result;
         break;
       }
       case Opcode::AArch64_SUB_ZZZ_B: {  // sub zd.b, zn.b, zm.b
-        results[0] = sveHelp::sveSub_3vecs<uint8_t>(operands, VL_bits);
+        results[0] = sveSub_3vecs<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SUB_ZZZ_D: {  // sub zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveSub_3vecs<uint64_t>(operands, VL_bits);
+        results[0] = sveSub_3vecs<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SUB_ZZZ_H: {  // sub zd.h, zn.h, zm.h
-        results[0] = sveHelp::sveSub_3vecs<uint16_t>(operands, VL_bits);
+        results[0] = sveSub_3vecs<uint16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SUB_ZZZ_S: {  // sub zd.s, zn.s, zm.s
-        results[0] = sveHelp::sveSub_3vecs<uint32_t>(operands, VL_bits);
+        results[0] = sveSub_3vecs<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SUBv16i8: {  // sub vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecLogicOp_3vecs<uint8_t, 16>(
+        results[0] = vecLogicOp_3vecs<uint8_t, 16>(
             operands, [](uint8_t x, uint8_t y) -> uint8_t { return x - y; });
         break;
       }
       case Opcode::AArch64_SUBv1i64: {  // sub dd, dn, dm
-        results[0] = neonHelp::vecLogicOp_3vecs<uint64_t, 1>(
+        results[0] = vecLogicOp_3vecs<uint64_t, 1>(
             operands, [](uint64_t x, uint64_t y) -> uint64_t { return x - y; });
         break;
       }
       case Opcode::AArch64_SUBv2i32: {  // sub vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecLogicOp_3vecs<uint32_t, 2>(
+        results[0] = vecLogicOp_3vecs<uint32_t, 2>(
             operands, [](uint32_t x, uint32_t y) -> uint32_t { return x - y; });
         break;
       }
       case Opcode::AArch64_SUBv2i64: {  // sub vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecLogicOp_3vecs<uint64_t, 2>(
+        results[0] = vecLogicOp_3vecs<uint64_t, 2>(
             operands, [](uint64_t x, uint64_t y) -> uint64_t { return x - y; });
         break;
       }
       case Opcode::AArch64_SUBv4i16: {  // sub vd.4h, vn.4h, vm.4h
-        results[0] = neonHelp::vecLogicOp_3vecs<uint16_t, 4>(
+        results[0] = vecLogicOp_3vecs<uint16_t, 4>(
             operands, [](uint64_t x, uint16_t y) -> uint16_t { return x - y; });
         break;
       }
       case Opcode::AArch64_SUBv4i32: {  // sub vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecLogicOp_3vecs<uint32_t, 4>(
+        results[0] = vecLogicOp_3vecs<uint32_t, 4>(
             operands, [](uint32_t x, uint32_t y) -> uint32_t { return x - y; });
         break;
       }
       case Opcode::AArch64_SUBv8i16: {  // sub vd.8h, vn.8h, vm.8h
-        results[0] = neonHelp::vecLogicOp_3vecs<uint16_t, 8>(
+        results[0] = vecLogicOp_3vecs<uint16_t, 8>(
             operands, [](uint16_t x, uint16_t y) -> uint16_t { return x - y; });
         break;
       }
       case Opcode::AArch64_SUBv8i8: {  // sub vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecLogicOp_3vecs<uint8_t, 8>(
+        results[0] = vecLogicOp_3vecs<uint8_t, 8>(
             operands, [](uint8_t x, uint8_t y) -> uint8_t { return x - y; });
         break;
       }
@@ -5248,8 +5125,7 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_SXTW_ZPmZ_D: {  // sxtw zd.d, pg/m, zn.d
-        results[0] =
-            sveHelp::sveSxtPredicated<int64_t, int32_t>(operands, VL_bits);
+        results[0] = sveSxtPredicated<int64_t, int32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_SYSxt: {  // sys #<op1>, cn, cm, #<op2>{, xt}
@@ -5267,182 +5143,179 @@ void Instruction::execute() {
       }
       case Opcode::AArch64_TBLv16i8Four: {  // tbl Vd.16b {Vn.16b, Vn+1.16b,
                                             // Vn+2.16b,Vn+3.16b } Vm.16b
-        results[0] = neonHelp::vecTbl<16>(operands, metadata);
+        results[0] = vecTbl<16>(operands, metadata);
         break;
       }
       case Opcode::AArch64_TBLv16i8One: {  // tbl Vd.16b {Vn.16b} Vm.16b
-        results[0] = neonHelp::vecTbl<16>(operands, metadata);
+        results[0] = vecTbl<16>(operands, metadata);
         break;
       }
       case Opcode::AArch64_TBLv16i8Three: {  // tbl Vd.16b {Vn.16b, Vn+1.16b,
                                              // Vn+2.16b } Vm.16b
-        results[0] = neonHelp::vecTbl<16>(operands, metadata);
+        results[0] = vecTbl<16>(operands, metadata);
         break;
       }
       case Opcode::AArch64_TBLv16i8Two: {  // tbl Vd.16b {Vn.16b, Vn+1.16b }
                                            // Vm.16b
-        results[0] = neonHelp::vecTbl<16>(operands, metadata);
+        results[0] = vecTbl<16>(operands, metadata);
         break;
       }
       case Opcode::AArch64_TBLv8i8Four: {  // tbl Vd.8b {Vn.16b, Vn+1.16b,
                                            // Vn+2.16b,Vn+3.16b } Vm.8b
-        results[0] = neonHelp::vecTbl<8>(operands, metadata);
+        results[0] = vecTbl<8>(operands, metadata);
         break;
       }
       case Opcode::AArch64_TBLv8i8One: {  // tbl Vd.8b {Vn.16b} Vm.8b
-        results[0] = neonHelp::vecTbl<8>(operands, metadata);
+        results[0] = vecTbl<8>(operands, metadata);
         break;
       }
       case Opcode::AArch64_TBLv8i8Three: {  // tbl Vd.8b {Vn.16b, Vn+1.16b,
                                             // Vn+2.16b } Vm.8b
-        results[0] = neonHelp::vecTbl<8>(operands, metadata);
+        results[0] = vecTbl<8>(operands, metadata);
         break;
       }
       case Opcode::AArch64_TBLv8i8Two: {  // tbl Vd.8b {Vn.16b, Vn+1.16b } Vm.8b
-        results[0] = neonHelp::vecTbl<8>(operands, metadata);
+        results[0] = vecTbl<8>(operands, metadata);
         break;
       }
       case Opcode::AArch64_TBNZW: {  // tbnz wn, #imm, label
-        auto [taken, addr] = conditionalHelp::tbnz_tbz<uint32_t>(
-            operands, metadata, instructionAddress_, true);
+        auto [taken, addr] =
+            tbnz_tbz<uint32_t>(operands, metadata, instructionAddress_, true);
         branchTaken_ = taken;
         branchAddress_ = addr;
         break;
       }
       case Opcode::AArch64_TBNZX: {  // tbnz xn, #imm, label
-        auto [taken, addr] = conditionalHelp::tbnz_tbz<uint64_t>(
-            operands, metadata, instructionAddress_, true);
+        auto [taken, addr] =
+            tbnz_tbz<uint64_t>(operands, metadata, instructionAddress_, true);
         branchTaken_ = taken;
         branchAddress_ = addr;
         break;
       }
       case Opcode::AArch64_TBZW: {  // tbz wn, #imm, label
-        auto [taken, addr] = conditionalHelp::tbnz_tbz<uint32_t>(
-            operands, metadata, instructionAddress_, false);
+        auto [taken, addr] =
+            tbnz_tbz<uint32_t>(operands, metadata, instructionAddress_, false);
         branchTaken_ = taken;
         branchAddress_ = addr;
         break;
       }
       case Opcode::AArch64_TBZX: {  // tbz xn, #imm, label
-        auto [taken, addr] = conditionalHelp::tbnz_tbz<uint64_t>(
-            operands, metadata, instructionAddress_, false);
+        auto [taken, addr] =
+            tbnz_tbz<uint64_t>(operands, metadata, instructionAddress_, false);
         branchTaken_ = taken;
         branchAddress_ = addr;
         break;
       }
       case Opcode::AArch64_TRN1_ZZZ_B: {  // trn1 zd.b, zn.b, zm.b
-        results[0] = sveHelp::sveTrn1_3vecs<uint8_t>(operands, VL_bits);
+        results[0] = sveTrn1_3vecs<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_TRN1_ZZZ_D: {  // trn1 zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveTrn1_3vecs<uint64_t>(operands, VL_bits);
+        results[0] = sveTrn1_3vecs<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_TRN1_ZZZ_H: {  // trn1 zd.h, zn.h, zm.h
-        results[0] = sveHelp::sveTrn1_3vecs<uint16_t>(operands, VL_bits);
+        results[0] = sveTrn1_3vecs<uint16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_TRN1_ZZZ_S: {  // trn1 zd.s, zn.s, zm.s
-        results[0] = sveHelp::sveTrn1_3vecs<uint32_t>(operands, VL_bits);
+        results[0] = sveTrn1_3vecs<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_TRN1v16i8: {  // trn1 vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecTrn1<uint8_t, 16>(operands);
+        results[0] = vecTrn1<uint8_t, 16>(operands);
         break;
       }
       case Opcode::AArch64_TRN1v2i32: {  // trn1 vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecTrn1<uint32_t, 2>(operands);
+        results[0] = vecTrn1<uint32_t, 2>(operands);
         break;
       }
       case Opcode::AArch64_TRN1v2i64: {  // trn1 vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecTrn1<uint64_t, 2>(operands);
+        results[0] = vecTrn1<uint64_t, 2>(operands);
         break;
       }
       case Opcode::AArch64_TRN1v4i16: {  // trn1 vd.4h, vn.4h, vm.4h
-        results[0] = neonHelp::vecTrn1<uint16_t, 4>(operands);
+        results[0] = vecTrn1<uint16_t, 4>(operands);
         break;
       }
       case Opcode::AArch64_TRN1v4i32: {  // trn1 vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecTrn1<uint32_t, 4>(operands);
+        results[0] = vecTrn1<uint32_t, 4>(operands);
         break;
       }
       case Opcode::AArch64_TRN1v8i16: {  // trn1 vd.8h, vn.8h, vm.8h
-        results[0] = neonHelp::vecTrn1<uint16_t, 8>(operands);
+        results[0] = vecTrn1<uint16_t, 8>(operands);
         break;
       }
       case Opcode::AArch64_TRN1v8i8: {  // trn1 vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecTrn1<uint8_t, 8>(operands);
+        results[0] = vecTrn1<uint8_t, 8>(operands);
         break;
       }
       case Opcode::AArch64_TRN2_ZZZ_B: {  // trn2 zd.b, zn.b, zm.b
-        results[0] = sveHelp::sveTrn2_3vecs<uint8_t>(operands, VL_bits);
+        results[0] = sveTrn2_3vecs<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_TRN2_ZZZ_D: {  // trn2 zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveTrn2_3vecs<uint64_t>(operands, VL_bits);
+        results[0] = sveTrn2_3vecs<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_TRN2_ZZZ_H: {  // trn2 zd.h, zn.h, zm.h
-        results[0] = sveHelp::sveTrn2_3vecs<uint16_t>(operands, VL_bits);
+        results[0] = sveTrn2_3vecs<uint16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_TRN2_ZZZ_S: {  // trn2 zd.s, zn.s, zm.s
-        results[0] = sveHelp::sveTrn2_3vecs<uint32_t>(operands, VL_bits);
+        results[0] = sveTrn2_3vecs<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_TRN2v16i8: {  // trn2 vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecTrn2<uint8_t, 16>(operands);
+        results[0] = vecTrn2<uint8_t, 16>(operands);
         break;
       }
       case Opcode::AArch64_TRN2v2i32: {  // trn2 vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecTrn2<uint32_t, 2>(operands);
+        results[0] = vecTrn2<uint32_t, 2>(operands);
         break;
       }
       case Opcode::AArch64_TRN2v2i64: {  // trn2 vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecTrn2<uint64_t, 2>(operands);
+        results[0] = vecTrn2<uint64_t, 2>(operands);
         break;
       }
       case Opcode::AArch64_TRN2v4i16: {  // trn2 vd.4h, vn.4h, vm.4h
-        results[0] = neonHelp::vecTrn2<uint16_t, 4>(operands);
+        results[0] = vecTrn2<uint16_t, 4>(operands);
         break;
       }
       case Opcode::AArch64_TRN2v4i32: {  // trn2 vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecTrn2<uint32_t, 4>(operands);
+        results[0] = vecTrn2<uint32_t, 4>(operands);
         break;
       }
       case Opcode::AArch64_TRN2v8i16: {  // trn2 vd.8h, vn.8h, vm.8h
-        results[0] = neonHelp::vecTrn2<uint16_t, 8>(operands);
+        results[0] = vecTrn2<uint16_t, 8>(operands);
         break;
       }
       case Opcode::AArch64_TRN2v8i8: {  // trn2 vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecTrn2<uint8_t, 8>(operands);
+        results[0] = vecTrn2<uint8_t, 8>(operands);
         break;
       }
       case Opcode::AArch64_UADDV_VPZ_B: {  // uaddv dd, pg, zn.b
-        results[0] = sveHelp::sveAddvPredicated<uint8_t>(operands, VL_bits);
+        results[0] = sveAddvPredicated<uint8_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_UADDV_VPZ_D: {  // uaddv dd, pg, zn.d
-        results[0] = sveHelp::sveAddvPredicated<uint64_t>(operands, VL_bits);
+        results[0] = sveAddvPredicated<uint64_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_UADDV_VPZ_H: {  // uaddv dd, pg, zn.h
-        results[0] = sveHelp::sveAddvPredicated<uint16_t>(operands, VL_bits);
+        results[0] = sveAddvPredicated<uint16_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_UADDV_VPZ_S: {  // uaddv dd, pg, zn.s
-        results[0] = sveHelp::sveAddvPredicated<uint32_t>(operands, VL_bits);
+        results[0] = sveAddvPredicated<uint32_t>(operands, VL_bits);
         break;
       }
       case Opcode::AArch64_UBFMWri: {  // ubfm wd, wn, #immr, #imms
-        results[0] = {
-            bitmanipHelp::bfm_2imms<uint32_t>(operands, metadata, false, true),
-            8};
+        results[0] = {bfm_2imms<uint32_t>(operands, metadata, false, true), 8};
         break;
       }
       case Opcode::AArch64_UBFMXri: {  // ubfm xd, xn, #immr, #imms
-        results[0] =
-            bitmanipHelp::bfm_2imms<uint64_t>(operands, metadata, false, true);
+        results[0] = bfm_2imms<uint64_t>(operands, metadata, false, true);
         break;
       }
       case Opcode::AArch64_UCVTFUWDri: {  // ucvtf dd, wn
@@ -5470,23 +5343,23 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_UDIVWr: {  // udiv wd, wn, wm
-        results[0] = {divideHelp::div_3ops<uint32_t>(operands), 8};
+        results[0] = {div_3ops<uint32_t>(operands), 8};
         break;
       }
       case Opcode::AArch64_UDIVXr: {  // udiv xd, xn, xm
-        results[0] = {divideHelp::div_3ops<uint64_t>(operands), 8};
+        results[0] = {div_3ops<uint64_t>(operands), 8};
         break;
       }
       case Opcode::AArch64_UMADDLrrr: {  // umaddl xd, wn, wm, xa
-        results[0] = multiplyHelp::maddl_4ops<uint64_t, uint32_t>(operands);
+        results[0] = maddl_4ops<uint64_t, uint32_t>(operands);
         break;
       }
       case Opcode::AArch64_UMAXPv16i8: {  // umaxp vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecUMaxP<uint8_t, 16>(operands);
+        results[0] = vecUMaxP<uint8_t, 16>(operands);
         break;
       }
       case Opcode::AArch64_UMINPv16i8: {  // uminp vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecUMinP<uint8_t, 16>(operands);
+        results[0] = vecUMinP<uint8_t, 16>(operands);
         break;
       }
       case Opcode::AArch64_UMOVvi32_idx0:  // umov wd, vn.s[0]
@@ -5508,224 +5381,214 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_UMSUBLrrr: {  // umsubl xd, wn, wm, xa
-        results[0] = arithmeticHelp::msubl_4ops<uint64_t, uint32_t>(operands);
+        results[0] = msubl_4ops<uint64_t, uint32_t>(operands);
         break;
       }
       case Opcode::AArch64_UMULHrr: {  // umulh xd, xn, xm
-        results[0] = AuxFunc::mulhi(operands[0].get<uint64_t>(),
-                                    operands[1].get<uint64_t>());
+        results[0] =
+            mulhi(operands[0].get<uint64_t>(), operands[1].get<uint64_t>());
         break;
       }
       case Opcode::AArch64_UQDECD_WPiI: {  // uqdecd wd{, pattern{, MUL #imm}}
-        results[0] =
-            sveHelp::sveUqdec<uint32_t, 64u>(operands, metadata, VL_bits);
+        results[0] = sveUqdec<uint32_t, 64u>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_UQDECD_XPiI: {  // uqdecd xd{, pattern{, MUL #imm}}
-        results[0] =
-            sveHelp::sveUqdec<uint64_t, 64u>(operands, metadata, VL_bits);
+        results[0] = sveUqdec<uint64_t, 64u>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_UQDECH_XPiI: {  // uqdech xd{, pattern{, MUL #imm}}
-        results[0] =
-            sveHelp::sveUqdec<uint64_t, 16u>(operands, metadata, VL_bits);
+        results[0] = sveUqdec<uint64_t, 16u>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_UQDECW_XPiI: {  // uqdecw xd{, pattern{, MUL #imm}}
-        results[0] =
-            sveHelp::sveUqdec<uint64_t, 32u>(operands, metadata, VL_bits);
+        results[0] = sveUqdec<uint64_t, 32u>(operands, metadata, VL_bits);
         break;
       }
       case Opcode::AArch64_USHLLv16i8_shift: {  // ushll2 vd.8h, vn.16b, #imm
-        results[0] = neonHelp::vecShllShift_vecImm<uint16_t, uint8_t, 8>(
-            operands, metadata, true);
+        results[0] =
+            vecShllShift_vecImm<uint16_t, uint8_t, 8>(operands, metadata, true);
         break;
       }
       case Opcode::AArch64_USHLLv4i16_shift: {  // ushll vd.4s, vn.4h, #imm
-        results[0] = neonHelp::vecShllShift_vecImm<uint32_t, uint16_t, 4>(
+        results[0] = vecShllShift_vecImm<uint32_t, uint16_t, 4>(
             operands, metadata, false);
         break;
       }
       case Opcode::AArch64_USHLLv8i16_shift: {  // ushll2 vd.4s, vn.8h, #imm
-        results[0] = neonHelp::vecShllShift_vecImm<uint32_t, uint16_t, 4>(
-            operands, metadata, true);
+        results[0] = vecShllShift_vecImm<uint32_t, uint16_t, 4>(operands,
+                                                                metadata, true);
         break;
       }
       case Opcode::AArch64_USHLLv8i8_shift: {  // ushll vd.8h, vn.8b, #imm
-        results[0] = neonHelp::vecShllShift_vecImm<uint16_t, uint8_t, 8>(
-            operands, metadata, false);
+        results[0] = vecShllShift_vecImm<uint16_t, uint8_t, 8>(operands,
+                                                               metadata, false);
         break;
       }
       case Opcode::AArch64_UUNPKHI_ZZ_D: {  // uunpkhi zd.d, zn.s
-        results[0] =
-            sveHelp::sveUnpk_vecs<uint64_t, uint32_t>(operands, VL_bits, true);
+        results[0] = sveUnpk_vecs<uint64_t, uint32_t>(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_UUNPKHI_ZZ_H: {  // uunpkhi zd.h, zn.b
-        results[0] =
-            sveHelp::sveUnpk_vecs<uint16_t, uint8_t>(operands, VL_bits, true);
+        results[0] = sveUnpk_vecs<uint16_t, uint8_t>(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_UUNPKHI_ZZ_S: {  // uunpkhi zd.s, zn.h
-        results[0] =
-            sveHelp::sveUnpk_vecs<uint32_t, uint16_t>(operands, VL_bits, true);
+        results[0] = sveUnpk_vecs<uint32_t, uint16_t>(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_UUNPKLO_ZZ_D: {  // uunpklo zd.d, zn.s
-        results[0] =
-            sveHelp::sveUnpk_vecs<uint64_t, uint32_t>(operands, VL_bits, false);
+        results[0] = sveUnpk_vecs<uint64_t, uint32_t>(operands, VL_bits, false);
         break;
       }
       case Opcode::AArch64_UUNPKLO_ZZ_H: {  // uunpklo zd.h, zn.b
-        results[0] =
-            sveHelp::sveUnpk_vecs<uint16_t, uint8_t>(operands, VL_bits, false);
+        results[0] = sveUnpk_vecs<uint16_t, uint8_t>(operands, VL_bits, false);
         break;
       }
       case Opcode::AArch64_UUNPKLO_ZZ_S: {  // uunpklo zd.s, zn.h
-        results[0] =
-            sveHelp::sveUnpk_vecs<uint32_t, uint16_t>(operands, VL_bits, false);
+        results[0] = sveUnpk_vecs<uint32_t, uint16_t>(operands, VL_bits, false);
         break;
       }
       case Opcode::AArch64_UZP1_ZZZ_S: {  // uzp1 zd.s, zn.s, zm.s
-        results[0] = sveHelp::sveUzp_vecs<uint32_t>(operands, VL_bits, true);
+        results[0] = sveUzp_vecs<uint32_t>(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_UZP1v16i8: {  // uzp1 vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecUzp<int8_t, 16>(operands, true);
+        results[0] = vecUzp<int8_t, 16>(operands, true);
         break;
       }
       case Opcode::AArch64_UZP1v2i32: {  // uzp1 vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecUzp<int32_t, 2>(operands, true);
+        results[0] = vecUzp<int32_t, 2>(operands, true);
         break;
       }
       case Opcode::AArch64_UZP1v2i64: {  // uzp1 vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecUzp<int64_t, 2>(operands, true);
+        results[0] = vecUzp<int64_t, 2>(operands, true);
         break;
       }
       case Opcode::AArch64_UZP1v4i16: {  // uzp1 vd.4h, vn.4h, vm.4h
-        results[0] = neonHelp::vecUzp<int16_t, 4>(operands, true);
+        results[0] = vecUzp<int16_t, 4>(operands, true);
         break;
       }
       case Opcode::AArch64_UZP1v4i32: {  // uzp1 vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecUzp<int32_t, 4>(operands, true);
+        results[0] = vecUzp<int32_t, 4>(operands, true);
         break;
       }
       case Opcode::AArch64_UZP1v8i16: {  // uzp1 vd.8h, vn.8h, vm.8h
-        results[0] = neonHelp::vecUzp<int16_t, 8>(operands, true);
+        results[0] = vecUzp<int16_t, 8>(operands, true);
         break;
       }
       case Opcode::AArch64_UZP1v8i8: {  // uzp1 vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecUzp<int8_t, 8>(operands, true);
+        results[0] = vecUzp<int8_t, 8>(operands, true);
         break;
       }
       case Opcode::AArch64_UZP2v16i8: {  // uzp2 vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecUzp<int8_t, 16>(operands, false);
+        results[0] = vecUzp<int8_t, 16>(operands, false);
         break;
       }
       case Opcode::AArch64_UZP2v2i32: {  // uzp2 vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecUzp<int32_t, 2>(operands, false);
+        results[0] = vecUzp<int32_t, 2>(operands, false);
         break;
       }
       case Opcode::AArch64_UZP2v2i64: {  // uzp2 vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecUzp<int64_t, 2>(operands, false);
+        results[0] = vecUzp<int64_t, 2>(operands, false);
         break;
       }
       case Opcode::AArch64_UZP2v4i16: {  // uzp2 vd.4h, vn.4h, vm.4h
-        results[0] = neonHelp::vecUzp<int16_t, 4>(operands, false);
+        results[0] = vecUzp<int16_t, 4>(operands, false);
         break;
       }
       case Opcode::AArch64_UZP2v4i32: {  // uzp2 vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecUzp<int32_t, 4>(operands, false);
+        results[0] = vecUzp<int32_t, 4>(operands, false);
         break;
       }
       case Opcode::AArch64_UZP2v8i16: {  // uzp2 vd.8h, vn.8h, vm.8h
-        results[0] = neonHelp::vecUzp<int16_t, 8>(operands, false);
+        results[0] = vecUzp<int16_t, 8>(operands, false);
         break;
       }
       case Opcode::AArch64_UZP2v8i8: {  // uzp2 vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecUzp<int8_t, 8>(operands, false);
+        results[0] = vecUzp<int8_t, 8>(operands, false);
         break;
       }
       case Opcode::AArch64_WHILELO_PWW_B: {  // whilelo pd.b, wn, wm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<uint32_t, uint8_t>(operands, VL_bits, true);
+            sveWhilelo<uint32_t, uint8_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELO_PWW_D: {  // whilelo pd.d, wn, wm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<uint32_t, uint64_t>(operands, VL_bits, true);
+            sveWhilelo<uint32_t, uint64_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELO_PWW_H: {  // whilelo pd.h, wn, wm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<uint32_t, uint16_t>(operands, VL_bits, true);
+            sveWhilelo<uint32_t, uint16_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELO_PWW_S: {  // whilelo pd.s, wn, wm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<uint32_t, uint32_t>(operands, VL_bits, true);
+            sveWhilelo<uint32_t, uint32_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELO_PXX_B: {  // whilelo pd.b, xn, xm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<uint64_t, uint8_t>(operands, VL_bits, true);
+            sveWhilelo<uint64_t, uint8_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELO_PXX_D: {  // whilelo pd.d, xn, xm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<uint64_t, uint64_t>(operands, VL_bits, true);
+            sveWhilelo<uint64_t, uint64_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELO_PXX_H: {  // whilelo pd.h, xn, xm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<uint64_t, uint16_t>(operands, VL_bits, true);
+            sveWhilelo<uint64_t, uint16_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELO_PXX_S: {  // whilelo pd.s, xn, xm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<uint64_t, uint32_t>(operands, VL_bits, true);
+            sveWhilelo<uint64_t, uint32_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELT_PXX_B: {  // whilelt pd.b, xn, xm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<int64_t, int8_t>(operands, VL_bits, true);
+            sveWhilelo<int64_t, int8_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELT_PXX_D: {  // whilelt pd.d, xn, xm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<int64_t, int64_t>(operands, VL_bits, true);
+            sveWhilelo<int64_t, int64_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELT_PXX_H: {  // whilelt pd.h, xn, xm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<int64_t, int16_t>(operands, VL_bits, true);
+            sveWhilelo<int64_t, int16_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
       }
       case Opcode::AArch64_WHILELT_PXX_S: {  // whilelt pd.s, xn, xm
         auto [output, nzcv] =
-            sveHelp::sveWhilelo<int64_t, int32_t>(operands, VL_bits, true);
+            sveWhilelo<int64_t, int32_t>(operands, VL_bits, true);
         results[0] = nzcv;
         results[1] = output;
         break;
@@ -5735,119 +5598,119 @@ void Instruction::execute() {
         break;
       }
       case Opcode::AArch64_XTNv2i32: {  // xtn vd.2s, vn.2d
-        results[0] = neonHelp::vecXtn<uint32_t, uint64_t, 2>(operands, false);
+        results[0] = vecXtn<uint32_t, uint64_t, 2>(operands, false);
         break;
       }
       case Opcode::AArch64_XTNv4i16: {  // xtn vd.4h, vn.4s
-        results[0] = neonHelp::vecXtn<uint16_t, uint32_t, 4>(operands, false);
+        results[0] = vecXtn<uint16_t, uint32_t, 4>(operands, false);
         break;
       }
       case Opcode::AArch64_XTNv4i32: {  // xtn2 vd.4s, vn.2d
-        results[0] = neonHelp::vecXtn<uint32_t, uint64_t, 4>(operands, true);
+        results[0] = vecXtn<uint32_t, uint64_t, 4>(operands, true);
         break;
       }
       case Opcode::AArch64_ZIP1_PPP_B: {  // zip1 pd.b, pn.b, pm.b
-        results[0] = sveHelp::sveZip_preds<uint8_t>(operands, VL_bits, false);
+        results[0] = sveZip_preds<uint8_t>(operands, VL_bits, false);
         break;
       }
       case Opcode::AArch64_ZIP1_PPP_D: {  // zip1 pd.d, pn.d, pm.d
-        results[0] = sveHelp::sveZip_preds<uint64_t>(operands, VL_bits, false);
+        results[0] = sveZip_preds<uint64_t>(operands, VL_bits, false);
         break;
       }
       case Opcode::AArch64_ZIP1_PPP_H: {  // zip1 pd.h, pn.h, pm.h
-        results[0] = sveHelp::sveZip_preds<uint16_t>(operands, VL_bits, false);
+        results[0] = sveZip_preds<uint16_t>(operands, VL_bits, false);
         break;
       }
       case Opcode::AArch64_ZIP1_PPP_S: {  // zip1 pd.s, pn.s, pm.s
-        results[0] = sveHelp::sveZip_preds<uint32_t>(operands, VL_bits, false);
+        results[0] = sveZip_preds<uint32_t>(operands, VL_bits, false);
         break;
       }
       case Opcode::AArch64_ZIP1_ZZZ_D: {  // zip1 zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveZip_vecs<uint64_t>(operands, VL_bits, false);
+        results[0] = sveZip_vecs<uint64_t>(operands, VL_bits, false);
         break;
       }
       case Opcode::AArch64_ZIP1_ZZZ_S: {  // zip1 zd.s, zn.s, zm.s
-        results[0] = sveHelp::sveZip_vecs<uint32_t>(operands, VL_bits, false);
+        results[0] = sveZip_vecs<uint32_t>(operands, VL_bits, false);
         break;
       }
       case Opcode::AArch64_ZIP1v16i8: {  // zip1 vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecZip<uint8_t, 16>(operands, false);
+        results[0] = vecZip<uint8_t, 16>(operands, false);
         break;
       }
       case Opcode::AArch64_ZIP1v2i32: {  // zip1 vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecZip<uint32_t, 2>(operands, false);
+        results[0] = vecZip<uint32_t, 2>(operands, false);
         break;
       }
       case Opcode::AArch64_ZIP1v2i64: {  // zip1 vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecZip<uint64_t, 2>(operands, false);
+        results[0] = vecZip<uint64_t, 2>(operands, false);
         break;
       }
       case Opcode::AArch64_ZIP1v4i16: {  // zip1 vd.4h, vn.4h, vm.4h
-        results[0] = neonHelp::vecZip<uint16_t, 4>(operands, false);
+        results[0] = vecZip<uint16_t, 4>(operands, false);
         break;
       }
       case Opcode::AArch64_ZIP1v4i32: {  // zip1 vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecZip<uint32_t, 4>(operands, false);
+        results[0] = vecZip<uint32_t, 4>(operands, false);
         break;
       }
       case Opcode::AArch64_ZIP1v8i16: {  // zip1 vd.8h, vn.8h, vm.8h
-        results[0] = neonHelp::vecZip<uint16_t, 8>(operands, false);
+        results[0] = vecZip<uint16_t, 8>(operands, false);
         break;
       }
       case Opcode::AArch64_ZIP1v8i8: {  // zip1 vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecZip<uint8_t, 8>(operands, false);
+        results[0] = vecZip<uint8_t, 8>(operands, false);
         break;
       }
       case Opcode::AArch64_ZIP2_PPP_B: {  // zip2 pd.b, pn.b, pm.b
-        results[0] = sveHelp::sveZip_preds<uint8_t>(operands, VL_bits, true);
+        results[0] = sveZip_preds<uint8_t>(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_ZIP2_PPP_D: {  // zip2 pd.d, pn.d, pm.d
-        results[0] = sveHelp::sveZip_preds<uint64_t>(operands, VL_bits, true);
+        results[0] = sveZip_preds<uint64_t>(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_ZIP2_PPP_H: {  // zip2 pd.h, pn.h, pm.h
-        results[0] = sveHelp::sveZip_preds<uint16_t>(operands, VL_bits, true);
+        results[0] = sveZip_preds<uint16_t>(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_ZIP2_PPP_S: {  // zip2 pd.s, pn.s, pm.s
-        results[0] = sveHelp::sveZip_preds<uint32_t>(operands, VL_bits, true);
+        results[0] = sveZip_preds<uint32_t>(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_ZIP2_ZZZ_D: {  // zip2 zd.d, zn.d, zm.d
-        results[0] = sveHelp::sveZip_vecs<uint64_t>(operands, VL_bits, true);
+        results[0] = sveZip_vecs<uint64_t>(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_ZIP2_ZZZ_S: {  // zip2 zd.s, zn.s, zm.s
-        results[0] = sveHelp::sveZip_vecs<uint32_t>(operands, VL_bits, true);
+        results[0] = sveZip_vecs<uint32_t>(operands, VL_bits, true);
         break;
       }
       case Opcode::AArch64_ZIP2v16i8: {  // zip2 vd.16b, vn.16b, vm.16b
-        results[0] = neonHelp::vecZip<uint8_t, 16>(operands, true);
+        results[0] = vecZip<uint8_t, 16>(operands, true);
         break;
       }
       case Opcode::AArch64_ZIP2v2i32: {  // zip2 vd.2s, vn.2s, vm.2s
-        results[0] = neonHelp::vecZip<uint32_t, 2>(operands, true);
+        results[0] = vecZip<uint32_t, 2>(operands, true);
         break;
       }
       case Opcode::AArch64_ZIP2v2i64: {  // zip2 vd.2d, vn.2d, vm.2d
-        results[0] = neonHelp::vecZip<uint64_t, 2>(operands, true);
+        results[0] = vecZip<uint64_t, 2>(operands, true);
         break;
       }
       case Opcode::AArch64_ZIP2v4i16: {  // zip2 vd.4h, vn.4h, vm.4h
-        results[0] = neonHelp::vecZip<uint16_t, 4>(operands, true);
+        results[0] = vecZip<uint16_t, 4>(operands, true);
         break;
       }
       case Opcode::AArch64_ZIP2v4i32: {  // zip2 vd.4s, vn.4s, vm.4s
-        results[0] = neonHelp::vecZip<uint32_t, 4>(operands, true);
+        results[0] = vecZip<uint32_t, 4>(operands, true);
         break;
       }
       case Opcode::AArch64_ZIP2v8i16: {  // zip2 vd.8h, vn.8h, vm.8h
-        results[0] = neonHelp::vecZip<uint16_t, 8>(operands, true);
+        results[0] = vecZip<uint16_t, 8>(operands, true);
         break;
       }
       case Opcode::AArch64_ZIP2v8i8: {  // zip2 vd.8b, vn.8b, vm.8b
-        results[0] = neonHelp::vecZip<uint8_t, 8>(operands, true);
+        results[0] = vecZip<uint8_t, 8>(operands, true);
         break;
       }
       case Opcode::AArch64_ZERO_M: {  // zero {mask}
diff --git a/src/lib/arch/riscv/Architecture.cc b/src/lib/arch/riscv/Architecture.cc
index 8765e3dee8..b641f8fbaa 100644
--- a/src/lib/arch/riscv/Architecture.cc
+++ b/src/lib/arch/riscv/Architecture.cc
@@ -235,7 +235,7 @@ int32_t Architecture::getSystemRegisterTag(uint16_t reg) const {
   // Check below is done for speculative instructions that may be passed into
   // the function but will not be executed. If such invalid speculative
   // instructions get through they can cause an out-of-range error.
-  if (!systemRegisterMap_.count(reg)) return 0;
+  if (!systemRegisterMap_.count(reg)) return -1;
   return systemRegisterMap_.at(reg);
 }
 
diff --git a/src/lib/arch/riscv/ExceptionHandler.cc b/src/lib/arch/riscv/ExceptionHandler.cc
index 07b1e5aac3..b6ebcbb924 100644
--- a/src/lib/arch/riscv/ExceptionHandler.cc
+++ b/src/lib/arch/riscv/ExceptionHandler.cc
@@ -874,7 +874,7 @@ void ExceptionHandler::printException(const Instruction& insn) const {
       std::cout << "execution not-yet-implemented";
       break;
     case InstructionException::AliasNotYetImplemented:
-      std::cout << "alias not-yet-implemented" << std::endl;
+      std::cout << "alias not-yet-implemented";
       break;
     case InstructionException::MisalignedPC:
       std::cout << "misaligned program counter";
diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc
index 1af879f577..2c2eaf78cb 100644
--- a/src/lib/arch/riscv/Instruction.cc
+++ b/src/lib/arch/riscv/Instruction.cc
@@ -10,8 +10,6 @@ namespace simeng {
 namespace arch {
 namespace riscv {
 
-const Register Instruction::ZERO_REGISTER = {RegisterType::GENERAL, 0};
-
 Instruction::Instruction(const Architecture& architecture,
                          const InstructionMetadata& metadata)
     : architecture_(architecture),
@@ -36,7 +34,7 @@ const span<Register> Instruction::getSourceRegisters() const {
 }
 
 const span<RegisterValue> Instruction::getSourceOperands() const {
-  return {const_cast<RegisterValue*>(operands.data()), operands.size()};
+  return {const_cast<RegisterValue*>(operands.data()), sourceRegisterCount};
 }
 
 const span<Register> Instruction::getDestinationRegisters() const {
diff --git a/src/lib/arch/riscv/Instruction_decode.cc b/src/lib/arch/riscv/Instruction_decode.cc
index 18c024eded..d997ff5b7d 100644
--- a/src/lib/arch/riscv/Instruction_decode.cc
+++ b/src/lib/arch/riscv/Instruction_decode.cc
@@ -52,7 +52,7 @@ Register csRegToRegister(unsigned int reg) {
 
   if (reg == RISCV_REG_X0) {
     // Zero register
-    return Instruction::ZERO_REGISTER;
+    return RegisterType::ZERO_REGISTER;
   }
 
   assert(false && "Decoding failed due to unknown register identifier");
@@ -148,7 +148,7 @@ void Instruction::decode() {
         sourceRegisters[sourceRegisterCount] = csRegToRegister(op.reg);
 
         if (sourceRegisters[sourceRegisterCount] ==
-            Instruction::ZERO_REGISTER) {
+            RegisterType::ZERO_REGISTER) {
           // Catch zero register references and pre-complete those operands
           operands[sourceRegisterCount] = RegisterValue(0, 8);
         } else {
@@ -157,7 +157,7 @@ void Instruction::decode() {
 
         sourceRegisterCount++;
       } else {
-        if (csRegToRegister(op.reg) != Instruction::ZERO_REGISTER) {
+        if (csRegToRegister(op.reg) != RegisterType::ZERO_REGISTER) {
           destinationRegisters[destinationRegisterCount] =
               csRegToRegister(op.reg);
 
@@ -190,7 +190,7 @@ void Instruction::decode() {
       //  Second or third operand
       sourceRegisters[sourceRegisterCount] = csRegToRegister(op.reg);
 
-      if (sourceRegisters[sourceRegisterCount] == Instruction::ZERO_REGISTER) {
+      if (sourceRegisters[sourceRegisterCount] == RegisterType::ZERO_REGISTER) {
         // Catch zero register references and pre-complete those operands
         operands[sourceRegisterCount] = RegisterValue(0, 8);
       } else {
diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc
index f04dc4677c..697c14c7c4 100644
--- a/src/lib/config/ModelConfig.cc
+++ b/src/lib/config/ModelConfig.cc
@@ -649,10 +649,14 @@ void ModelConfig::setExpectations(bool isDefault) {
   expectations_.addChild(ExpectationNode::createExpectation("CPU-Info"));
 
   expectations_["CPU-Info"].addChild(ExpectationNode::createExpectation<bool>(
-      false, "Generate-Special-Dir", true));
+      true, "Generate-Special-Dir", true));
   expectations_["CPU-Info"]["Generate-Special-Dir"].setValueSet(
       std::vector{false, true});
 
+  expectations_["CPU-Info"].addChild(
+      ExpectationNode::createExpectation<std::string>(
+          defaultSpecialFilePath_, "Special-File-Dir-Path", true));
+
   expectations_["CPU-Info"].addChild(
       ExpectationNode::createExpectation<uint64_t>(1, "Core-Count", true));
   expectations_["CPU-Info"]["Core-Count"].setValueBounds<uint64_t>(1,
@@ -887,6 +891,18 @@ void ModelConfig::postValidation() {
   for (const auto& prt : portnames)
     invalid_ << "\t- " << prt << " has no associated reservation station\n";
 
+  // Ensure that given special file directory exists iff auto-generation is
+  // False
+  if (!configTree_["CPU-Info"]["Generate-Special-Dir"].as<bool>() &&
+      !std::ifstream(
+           configTree_["CPU-Info"]["Special-File-Dir-Path"].as<std::string>())
+           .good()) {
+    invalid_
+        << "\t- Special File Directory '"
+        << configTree_["CPU-Info"]["Special-File-Dir-Path"].as<std::string>()
+        << "' does not exist\n";
+  }
+
   // Ensure the L1-[Data|Instruction]-Memory:Interface-Type restrictions are
   // enforced
   std::string simMode =
diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc
index a37b8e038a..bfd0c6c6f6 100644
--- a/src/lib/models/emulation/Core.cc
+++ b/src/lib/models/emulation/Core.cc
@@ -171,8 +171,6 @@ void Core::execute(std::shared_ptr<Instruction>& uop) {
   }
 
   if (uop->isStoreData()) {
-    auto results = uop->getResults();
-    auto destinations = uop->getDestinationRegisters();
     auto data = uop->getData();
     for (size_t i = 0; i < previousAddresses_.size(); i++) {
       dataMemory_.requestWrite(previousAddresses_[i], data[i]);
diff --git a/src/lib/models/outoforder/Core.cc b/src/lib/models/outoforder/Core.cc
index ca4d5ce564..83a9dd9cda 100644
--- a/src/lib/models/outoforder/Core.cc
+++ b/src/lib/models/outoforder/Core.cc
@@ -168,11 +168,11 @@ void Core::flushIfNeeded() {
   // Check for flush
   bool euFlush = false;
   uint64_t targetAddress = 0;
-  uint64_t lowestSeqId = 0;
+  uint64_t lowestInsnId = 0;
   for (const auto& eu : executionUnits_) {
-    if (eu.shouldFlush() && (!euFlush || eu.getFlushSeqId() < lowestSeqId)) {
+    if (eu.shouldFlush() && (!euFlush || eu.getFlushInsnId() < lowestInsnId)) {
       euFlush = true;
-      lowestSeqId = eu.getFlushSeqId();
+      lowestInsnId = eu.getFlushInsnId();
       targetAddress = eu.getFlushAddress();
     }
   }
@@ -182,10 +182,10 @@ void Core::flushIfNeeded() {
     // Rename/Dispatch)
 
     if (reorderBuffer_.shouldFlush() &&
-        (!euFlush || reorderBuffer_.getFlushSeqId() < lowestSeqId)) {
+        (!euFlush || reorderBuffer_.getFlushInsnId() < lowestInsnId)) {
       // If the reorder buffer found an older instruction to flush up to, do
       // that instead
-      lowestSeqId = reorderBuffer_.getFlushSeqId();
+      lowestInsnId = reorderBuffer_.getFlushInsnId();
       targetAddress = reorderBuffer_.getFlushAddress();
     }
 
@@ -201,7 +201,7 @@ void Core::flushIfNeeded() {
     renameToDispatchBuffer_.stall(false);
 
     // Flush everything younger than the bad instruction from the ROB
-    reorderBuffer_.flush(lowestSeqId);
+    reorderBuffer_.flush(lowestInsnId);
     decodeUnit_.purgeFlushed();
     dispatchIssueUnit_.purgeFlushed();
     loadStoreQueue_.purgeFlushed();
diff --git a/src/lib/pipeline/A64FXPortAllocator.cc b/src/lib/pipeline/A64FXPortAllocator.cc
index d7cb9626ee..95a55cdb9b 100644
--- a/src/lib/pipeline/A64FXPortAllocator.cc
+++ b/src/lib/pipeline/A64FXPortAllocator.cc
@@ -145,6 +145,7 @@ uint16_t A64FXPortAllocator::allocate(const std::vector<uint16_t>& ports) {
 }
 
 void A64FXPortAllocator::issued(uint16_t port) {}
+
 void A64FXPortAllocator::deallocate(uint16_t port) { issued(port); };
 
 uint8_t A64FXPortAllocator::attributeMapping(
diff --git a/src/lib/pipeline/DispatchIssueUnit.cc b/src/lib/pipeline/DispatchIssueUnit.cc
index e8019986bf..afd7390215 100644
--- a/src/lib/pipeline/DispatchIssueUnit.cc
+++ b/src/lib/pipeline/DispatchIssueUnit.cc
@@ -199,10 +199,6 @@ void DispatchIssueUnit::forwardOperands(const span<Register>& registers,
   }
 }
 
-void DispatchIssueUnit::setRegisterReady(Register reg) {
-  scoreboard_[reg.type][reg.tag] = true;
-}
-
 void DispatchIssueUnit::purgeFlushed() {
   for (size_t i = 0; i < reservationStations_.size(); i++) {
     // Search the ready queues for flushed instructions and remove them
diff --git a/src/lib/pipeline/ExecuteUnit.cc b/src/lib/pipeline/ExecuteUnit.cc
index 7d789b34bd..c87c2e1845 100644
--- a/src/lib/pipeline/ExecuteUnit.cc
+++ b/src/lib/pipeline/ExecuteUnit.cc
@@ -165,7 +165,7 @@ void ExecuteUnit::execute(std::shared_ptr<Instruction>& uop) {
 
 bool ExecuteUnit::shouldFlush() const { return shouldFlush_; }
 uint64_t ExecuteUnit::getFlushAddress() const { return pc_; }
-uint64_t ExecuteUnit::getFlushSeqId() const { return flushAfter_; }
+uint64_t ExecuteUnit::getFlushInsnId() const { return flushAfter_; }
 
 void ExecuteUnit::purgeFlushed() {
   if (pipeline_.size() == 0) {
diff --git a/src/lib/pipeline/RegisterAliasTable.cc b/src/lib/pipeline/RegisterAliasTable.cc
index 4aba7852f9..23bcac3980 100644
--- a/src/lib/pipeline/RegisterAliasTable.cc
+++ b/src/lib/pipeline/RegisterAliasTable.cc
@@ -93,6 +93,7 @@ void RegisterAliasTable::commit(Register physical) {
   auto oldTag = historyTable_[physical.type][physical.tag];
   freeQueues_[physical.type].push(oldTag);
 }
+
 void RegisterAliasTable::rewind(Register physical) {
   assert(physical.renamed &&
          "Attempted to rewind a physical register which hasn't been subject to "
@@ -105,9 +106,6 @@ void RegisterAliasTable::rewind(Register physical) {
   // Add the rewound physical tag back to the free queue
   freeQueues_[physical.type].push(physical.tag);
 }
-void RegisterAliasTable::free(Register physical) {
-  freeQueues_[physical.type].push(physical.tag);
-}
 
 }  // namespace pipeline
 }  // namespace simeng
diff --git a/src/lib/pipeline/ReorderBuffer.cc b/src/lib/pipeline/ReorderBuffer.cc
index c2b17909b5..a706bab866 100644
--- a/src/lib/pipeline/ReorderBuffer.cc
+++ b/src/lib/pipeline/ReorderBuffer.cc
@@ -155,12 +155,12 @@ unsigned int ReorderBuffer::commit(unsigned int maxCommitSize) {
   return n;
 }
 
-void ReorderBuffer::flush(uint64_t afterSeqId) {
+void ReorderBuffer::flush(uint64_t afterInsnId) {
   // Iterate backwards from the tail of the queue to find and remove ops newer
-  // than `afterSeqId`
+  // than `afterInsnId`
   while (!buffer_.empty()) {
     auto& uop = buffer_.back();
-    if (uop->getInstructionId() <= afterSeqId) {
+    if (uop->getInstructionId() <= afterInsnId) {
       break;
     }
 
@@ -193,7 +193,7 @@ unsigned int ReorderBuffer::getFreeSpace() const {
 
 bool ReorderBuffer::shouldFlush() const { return shouldFlush_; }
 uint64_t ReorderBuffer::getFlushAddress() const { return pc_; }
-uint64_t ReorderBuffer::getFlushSeqId() const { return flushAfter_; }
+uint64_t ReorderBuffer::getFlushInsnId() const { return flushAfter_; }
 
 uint64_t ReorderBuffer::getInstructionsCommittedCount() const {
   return instructionsCommitted_;
diff --git a/src/tools/simeng/main.cc b/src/tools/simeng/main.cc
index 9e6bd05eef..32d1a11629 100644
--- a/src/tools/simeng/main.cc
+++ b/src/tools/simeng/main.cc
@@ -99,6 +99,11 @@ int main(int argc, char** argv) {
   else
     std::cout << "False";
   std::cout << std::endl;
+  std::cout << "[SimEng] Special File directory used: "
+            << simeng::config::SimInfo::getConfig()["CPU-Info"]
+                                                   ["Special-File-Dir-Path"]
+                                                       .as<std::string>()
+            << std::endl;
   std::cout << "[SimEng] Number of Cores: "
             << simeng::config::SimInfo::getConfig()["CPU-Info"]["Core-Count"]
                    .as<uint16_t>()
diff --git a/sst/SimEngCoreWrapper.cc b/sst/SimEngCoreWrapper.cc
index 338af9e002..b33af204e4 100644
--- a/sst/SimEngCoreWrapper.cc
+++ b/sst/SimEngCoreWrapper.cc
@@ -354,6 +354,11 @@ void SimEngCoreWrapper::fabricateSimEngCore() {
   else
     std::cout << "False";
   std::cout << std::endl;
+  std::cout << "[SimEng] Special File directory used: "
+            << simeng::config::SimInfo::getConfig()["CPU-Info"]
+                                                   ["Special-File-Dir-Path"]
+                                                       .as<std::string>()
+            << std::endl;
   std::cout << "[SimEng] Number of Cores: "
             << simeng::config::SimInfo::getConfig()["CPU-Info"]["Core-Count"]
                    .as<uint16_t>()
diff --git a/test/integration/ConfigTest.cc b/test/integration/ConfigTest.cc
index f121c0fb07..ed0f4124de 100644
--- a/test/integration/ConfigTest.cc
+++ b/test/integration/ConfigTest.cc
@@ -3,6 +3,7 @@
 
 #include "gtest/gtest.h"
 #include "simeng/config/SimInfo.hh"
+#include "simeng/version.hh"
 
 namespace {
 
@@ -11,6 +12,7 @@ TEST(ConfigTest, Default) {
   // Test key default values exposed in SimInfo
   EXPECT_EQ(simeng::config::SimInfo::getConfigPath(), "Default");
   EXPECT_EQ(simeng::config::SimInfo::getISA(), simeng::config::ISA::AArch64);
+  EXPECT_EQ(simeng::config::SimInfo::getISAString(), "AArch64");
   EXPECT_EQ(simeng::config::SimInfo::getSimMode(),
             simeng::config::SimulationMode::Emulation);
   EXPECT_EQ(simeng::config::SimInfo::getSimModeStr(), "Emulation");
@@ -66,7 +68,8 @@ TEST(ConfigTest, Default) {
       "   'Instruction-Groups':\n      - NONE\n    'Instruction-Opcodes':\n    "
       "  - 6343\n    'Execution-Latency': 1\n    'Execution-Throughput': 1\n   "
       " 'Instruction-Group-Nums':\n      - 87\n'CPU-Info':\n  "
-      "'Generate-Special-Dir': 0\n  'Core-Count': 1\n  'Socket-Count': 1\n  "
+      "'Generate-Special-Dir': 1\n  'Special-File-Dir-Path': " SIMENG_BUILD_DIR
+      "/specialFiles/\n  'Core-Count': 1\n  'Socket-Count': 1\n  "
       "SMT: 1\n  BogoMIPS: 0\n  Features: ''\n  'CPU-Implementer': 0x0\n  "
       "'CPU-Architecture': 0\n  'CPU-Variant': 0x0\n  'CPU-Part': 0x0\n  "
       "'CPU-Revision': 0\n  'Package-Count': 1\n";
@@ -77,6 +80,7 @@ TEST(ConfigTest, Default) {
 
   // Test SimInfo exposed have correctly changed
   EXPECT_EQ(simeng::config::SimInfo::getISA(), simeng::config::ISA::RV64);
+  EXPECT_EQ(simeng::config::SimInfo::getISAString(), "rv64");
   sysRegisterEnums = {simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FFLAGS,
                       simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FRM,
                       simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FCSR,
@@ -119,7 +123,8 @@ TEST(ConfigTest, Default) {
       "   'Instruction-Groups':\n      - NONE\n    'Instruction-Opcodes':\n    "
       "  - 450\n    'Execution-Latency': 1\n    'Execution-Throughput': 1\n    "
       "'Instruction-Group-Nums':\n      - 24\n'CPU-Info':\n  "
-      "'Generate-Special-Dir': 0\n  'Core-Count': 1\n  'Socket-Count': 1\n  "
+      "'Generate-Special-Dir': 1\n  'Special-File-Dir-Path': " SIMENG_BUILD_DIR
+      "/specialFiles/\n  'Core-Count': 1\n  'Socket-Count': 1\n  "
       "SMT: 1\n  BogoMIPS: 0\n  Features: ''\n  'CPU-Implementer': 0x0\n  "
       "'CPU-Architecture': 0\n  'CPU-Variant': 0x0\n  'CPU-Part': 0x0\n  "
       "'CPU-Revision': 0\n  'Package-Count': 1\n";
@@ -359,4 +364,47 @@ TEST(ConfigTest, multipleWildNodes) {
       "instance of key HEAD");
 }
 
+// Test that, using a file path, a config can be set from a yaml file
+TEST(ConfigTest, configFromFile) {
+  std::string filePath = SIMENG_SOURCE_DIR "/configs/a64fx.yaml";
+  simeng::config::SimInfo::setConfig(filePath);
+  EXPECT_EQ(simeng::config::SimInfo::getConfigPath(), filePath);
+  EXPECT_EQ(simeng::config::SimInfo::getISA(), simeng::config::ISA::AArch64);
+  EXPECT_EQ(simeng::config::SimInfo::getISAString(), "AArch64");
+  EXPECT_EQ(simeng::config::SimInfo::getSimMode(),
+            simeng::config::SimulationMode::Outoforder);
+  EXPECT_EQ(simeng::config::SimInfo::getSimModeStr(), "Out-of-Order");
+  std::vector<uint64_t> sysRegisterEnums = {
+      arm64_sysreg::ARM64_SYSREG_DCZID_EL0,
+      arm64_sysreg::ARM64_SYSREG_FPCR,
+      arm64_sysreg::ARM64_SYSREG_FPSR,
+      arm64_sysreg::ARM64_SYSREG_TPIDR_EL0,
+      arm64_sysreg::ARM64_SYSREG_MIDR_EL1,
+      arm64_sysreg::ARM64_SYSREG_CNTVCT_EL0,
+      arm64_sysreg::ARM64_SYSREG_PMCCNTR_EL0,
+      arm64_sysreg::ARM64_SYSREG_SVCR};
+  EXPECT_EQ(simeng::config::SimInfo::getSysRegVec(), sysRegisterEnums);
+  std::vector<simeng::RegisterFileStructure> archRegStruct = {
+      {8, 32},
+      {256, 32},
+      {32, 17},
+      {1, 1},
+      {8, static_cast<uint16_t>(sysRegisterEnums.size())},
+      {256, 16}};
+  EXPECT_EQ(simeng::config::SimInfo::getArchRegStruct(), archRegStruct);
+  std::vector<simeng::RegisterFileStructure> physRegStruct = {
+      {8, 96},
+      {256, 128},
+      {32, 48},
+      {1, 128},
+      {8, static_cast<uint16_t>(sysRegisterEnums.size())},
+      {256, 16}};
+  EXPECT_EQ(simeng::config::SimInfo::getPhysRegStruct(), physRegStruct);
+  std::vector<uint16_t> physRegQuants = {
+      96, 128, 48, 128, static_cast<uint16_t>(sysRegisterEnums.size()), 16};
+  EXPECT_EQ(simeng::config::SimInfo::getPhysRegQuantities(), physRegQuants);
+}
+// getPhysRegStruct()
+// getPhysRegQuantities()
+
 }  // namespace
diff --git a/test/regression/RegressionTest.cc b/test/regression/RegressionTest.cc
index 37655d01e3..5b43ac6cd0 100644
--- a/test/regression/RegressionTest.cc
+++ b/test/regression/RegressionTest.cc
@@ -75,7 +75,9 @@ void RegressionTest::run(const char* source, const char* triple,
   std::unique_ptr<simeng::MemoryInterface> dataMemory;
 
   // Create the OS kernel and the process
-  simeng::kernel::Linux kernel;
+  simeng::kernel::Linux kernel(
+      simeng::config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"]
+          .as<std::string>());
   kernel.createProcess(*process_);
 
   // Populate the heap with initial data (specified by the test being run).
diff --git a/test/regression/aarch64/instructions/float.cc b/test/regression/aarch64/instructions/float.cc
index 10d3150e1c..23c174407f 100644
--- a/test/regression/aarch64/instructions/float.cc
+++ b/test/regression/aarch64/instructions/float.cc
@@ -533,12 +533,18 @@ TEST_P(InstFloat, fcvt) {
 }
 
 TEST_P(InstFloat, fcvtzu) {
-  initialHeapData_.resize(32);
+  initialHeapData_.resize(80);
   double* dheap = reinterpret_cast<double*>(initialHeapData_.data());
   dheap[0] = 1.0;
   dheap[1] = -42.76;
   dheap[2] = -0.125;
   dheap[3] = 321.5;
+  dheap[4] = std::nan(0);
+  dheap[5] = -std::nan(0);
+  dheap[6] = INFINITY;
+  dheap[7] = -INFINITY;
+  dheap[8] = 4294967296.0;            // uint32_max + 1
+  dheap[9] = 18446744073709551616.0;  // uint64_max + 1
 
   // Double to uint32
   RUN_AARCH64(R"(
@@ -549,15 +555,30 @@ TEST_P(InstFloat, fcvtzu) {
 
     ldp d0, d1, [x0]
     ldp d2, d3, [x0, #16]
+    ldp d4, d5, [x0, #32]
+    ldp d6, d7, [x0, #48]
+    ldp d8, d9, [x0, #64]
     fcvtzu w0, d0
     fcvtzu w1, d1
     fcvtzu w2, d2
     fcvtzu w3, d3
+    fcvtzu w4, d4
+    fcvtzu w5, d5
+    fcvtzu w6, d6
+    fcvtzu w7, d7
+    fcvtzu w8, d8
+    fcvtzu w9, d9
   )");
   EXPECT_EQ((getGeneralRegister<uint32_t>(0)), 1);
-  EXPECT_EQ((getGeneralRegister<uint32_t>(1)), -42);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(1)), 0);
   EXPECT_EQ((getGeneralRegister<uint32_t>(2)), 0);
   EXPECT_EQ((getGeneralRegister<uint32_t>(3)), 321);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(4)), 0);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(5)), 0);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(6)), UINT32_MAX);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(7)), 0);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(8)), UINT32_MAX);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(9)), UINT32_MAX);
 
   // Double to uint64
   RUN_AARCH64(R"(
@@ -568,17 +589,32 @@ TEST_P(InstFloat, fcvtzu) {
 
     ldp d0, d1, [x0]
     ldp d2, d3, [x0, #16]
+    ldp d4, d5, [x0, #32]
+    ldp d6, d7, [x0, #48]
+    ldp d8, d9, [x0, #64]
     fcvtzu x0, d0
     fcvtzu x1, d1
     fcvtzu x2, d2
     fcvtzu x3, d3
+    fcvtzu x4, d4
+    fcvtzu x5, d5
+    fcvtzu x6, d6
+    fcvtzu x7, d7
+    fcvtzu x8, d8
+    fcvtzu x9, d9
   )");
   EXPECT_EQ((getGeneralRegister<uint64_t>(0)), 1);
-  EXPECT_EQ((getGeneralRegister<uint64_t>(1)), -42);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(1)), 0);
   EXPECT_EQ((getGeneralRegister<uint64_t>(2)), 0);
   EXPECT_EQ((getGeneralRegister<uint64_t>(3)), 321);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(4)), 0);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(5)), 0);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(6)), UINT64_MAX);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(7)), 0);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(8)), 4294967296);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(9)), UINT64_MAX);
 
-  // Double to implicit_cast<double>(uint64)
+  // Double to scalar uint64
   RUN_AARCH64(R"(
     # Get heap address
     mov x0, 0
@@ -587,25 +623,44 @@ TEST_P(InstFloat, fcvtzu) {
 
     ldp d0, d1, [x0]
     ldp d2, d3, [x0, #16]
+    ldp d4, d5, [x0, #32]
+    ldp d6, d7, [x0, #48]
+    ldp d8, d9, [x0, #64]
     fcvtzu d10, d0
     fcvtzu d11, d1
     fcvtzu d12, d2
     fcvtzu d13, d3
+    fcvtzu d14, d4
+    fcvtzu d15, d5
+    fcvtzu d16, d6
+    fcvtzu d17, d7
+    fcvtzu d18, d8
+    fcvtzu d19, d9
   )");
   // Values verified on A64FX via simple assembly test kernel
-  double a = 4.9406564584124654e-324;
-  double b = 0.0;
-  double c = 1.5859507231504014e-321;
-  CHECK_NEON(10, double, {a, 0.0});
-  CHECK_NEON(11, double, {b, 0.0});
-  CHECK_NEON(12, double, {b, 0.0});
-  CHECK_NEON(13, double, {c, 0.0});
+  CHECK_NEON(10, uint64_t, {1, 0});
+  CHECK_NEON(11, uint64_t, {0, 0});
+  CHECK_NEON(12, uint64_t, {0, 0});
+  CHECK_NEON(13, uint64_t, {321, 0});
+  CHECK_NEON(14, uint64_t, {0, 0});
+  CHECK_NEON(15, uint64_t, {0, 0});
+  CHECK_NEON(16, uint64_t, {UINT64_MAX, 0});
+  CHECK_NEON(17, uint64_t, {0, 0});
+  CHECK_NEON(18, uint64_t, {4294967296, 0});
+  CHECK_NEON(19, uint64_t, {UINT64_MAX, 0});
 
   float* fheap = reinterpret_cast<float*>(initialHeapData_.data());
-  fheap[0] = 1.0;
-  fheap[1] = -42.76;
-  fheap[2] = -0.125;
-  fheap[3] = 321.5;
+  fheap[0] = 1.0f;
+  fheap[1] = -42.76f;
+  fheap[2] = -0.125f;
+  fheap[3] = 321.5f;
+  fheap[4] = std::nanf(0);
+  fheap[5] = -std::nanf(0);
+  fheap[6] = INFINITY;
+  fheap[7] = -INFINITY;
+  fheap[8] = 4294967296.0;            // uint32_max + 1
+  fheap[9] = 18446744073709551616.0;  // uint64_max + 1
+
   // Float to uint32
   RUN_AARCH64(R"(
     # Get heap address
@@ -615,15 +670,30 @@ TEST_P(InstFloat, fcvtzu) {
 
     ldp s0, s1, [x0]
     ldp s2, s3, [x0, #8]
+    ldp s4, s5, [x0, #16]
+    ldp s6, s7, [x0, #24]
+    ldp s8, s9, [x0, #32]
     fcvtzu w0, s0
     fcvtzu w1, s1
     fcvtzu w2, s2
     fcvtzu w3, s3
+    fcvtzu w4, s4
+    fcvtzu w5, s5
+    fcvtzu w6, s6
+    fcvtzu w7, s7
+    fcvtzu w8, s8
+    fcvtzu w9, s9
   )");
   EXPECT_EQ((getGeneralRegister<uint32_t>(0)), 1);
-  EXPECT_EQ((getGeneralRegister<uint32_t>(1)), -42);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(1)), 0);
   EXPECT_EQ((getGeneralRegister<uint32_t>(2)), 0);
   EXPECT_EQ((getGeneralRegister<uint32_t>(3)), 321);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(4)), 0);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(5)), 0);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(6)), UINT32_MAX);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(7)), 0);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(8)), UINT32_MAX);
+  EXPECT_EQ((getGeneralRegister<uint32_t>(9)), UINT32_MAX);
 
   // Float to uint64
   RUN_AARCH64(R"(
@@ -634,15 +704,30 @@ TEST_P(InstFloat, fcvtzu) {
 
     ldp s0, s1, [x0]
     ldp s2, s3, [x0, #8]
+    ldp s4, s5, [x0, #16]
+    ldp s6, s7, [x0, #24]
+    ldp s8, s9, [x0, #32]
     fcvtzu x0, s0
     fcvtzu x1, s1
     fcvtzu x2, s2
     fcvtzu x3, s3
+    fcvtzu x4, s4
+    fcvtzu x5, s5
+    fcvtzu x6, s6
+    fcvtzu x7, s7
+    fcvtzu x8, s8
+    fcvtzu x9, s9
   )");
   EXPECT_EQ((getGeneralRegister<uint64_t>(0)), 1);
-  EXPECT_EQ((getGeneralRegister<uint64_t>(1)), -42);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(1)), 0);
   EXPECT_EQ((getGeneralRegister<uint64_t>(2)), 0);
   EXPECT_EQ((getGeneralRegister<uint64_t>(3)), 321);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(4)), 0);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(5)), 0);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(6)), UINT64_MAX);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(7)), 0);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(8)), 4294967296);
+  EXPECT_EQ((getGeneralRegister<uint64_t>(9)), UINT64_MAX);
 }
 
 TEST_P(InstFloat, fdiv) {
diff --git a/test/regression/aarch64/instructions/sve.cc b/test/regression/aarch64/instructions/sve.cc
index 6982fb8c9e..39a95ee4a3 100644
--- a/test/regression/aarch64/instructions/sve.cc
+++ b/test/regression/aarch64/instructions/sve.cc
@@ -4065,7 +4065,7 @@ TEST_P(InstSve, frintn) {
   initialHeapData_.resize(VL / 8);
   float* fheap = reinterpret_cast<float*>(initialHeapData_.data());
   std::vector<float> fsrcA = {1.0f,  -42.5f,   -0.125f, 0.0f,
-                              40.5f, -684.72f, -0.15f,  107.86f};
+                              41.5f, -684.72f, -0.15f,  107.86f};
   std::vector<float> fsrcB = {-34.5f,  -0.917f, 0.0f,    80.72f,
                               -125.5f, -0.01f,  701.90f, 7.5f};
   fillHeapCombined<float>(fheap, fsrcA, fsrcB, VL / 32);
@@ -4084,18 +4084,19 @@ TEST_P(InstSve, frintn) {
     ptrue p0.s
     whilelo p1.s, xzr, x2
 
-    dup z0.s, #15
-    dup z1.s, #13
+    fdup z0.s, #2.0
+    fdup z1.s, #3.0
     ld1w {z2.s}, p0/z, [x0, x1, lsl #2]
 
     frintn z0.s, p0/m, z2.s
     frintn z1.s, p1/m, z2.s
   )");
-  std::vector<int32_t> results32A = {1, -42, 0, 0, 40, -685, 0, 108};
-  std::vector<int32_t> results32B = {-34, -1, 0, 81, -126, 0, 702, 8};
-  CHECK_NEON(0, int32_t,
-             fillNeonCombined<int32_t>(results32A, results32B, VL / 8));
-  CHECK_NEON(1, int32_t, fillNeonCombined<int32_t>(results32A, {13}, VL / 8));
+  std::vector<float> results32A = {1.0f,  -42.0f,  0.0f, 0.0f,
+                                   42.0f, -685.0f, 0.0f, 108.0f};
+  std::vector<float> results32B = {-34.0f,  -1.0f, 0.0f,   81.0f,
+                                   -126.0f, 0.0f,  702.0f, 8.0f};
+  CHECK_NEON(0, float, fillNeonCombined<float>(results32A, results32B, VL / 8));
+  CHECK_NEON(1, float, fillNeonCombined<float>(results32A, {3.0}, VL / 8));
 
   // 64-bit
   initialHeapData_.resize(VL / 8);
@@ -4118,18 +4119,18 @@ TEST_P(InstSve, frintn) {
     ptrue p0.d
     whilelo p1.d, xzr, x2
 
-    dup z0.d, #15
-    dup z1.d, #13
+    fdup z0.d, #2.0
+    fdup z1.d, #3.0
     ld1d {z2.d}, p0/z, [x0, x1, lsl #3]
 
     frintn z0.d, p0/m, z2.d
     frintn z1.d, p1/m, z2.d
   )");
-  std::vector<int64_t> results64A = {1, -42, 0, 0};
-  std::vector<int64_t> results64B = {40, -685, -4, 108};
-  CHECK_NEON(0, int64_t,
-             fillNeonCombined<int64_t>(results64A, results64B, VL / 8));
-  CHECK_NEON(1, int64_t, fillNeonCombined<int64_t>(results64A, {13}, VL / 8));
+  std::vector<double> results64A = {1.0, -42.0, 0.0, 0.0};
+  std::vector<double> results64B = {40.0, -685.0, -4.0, 108.0};
+  CHECK_NEON(0, double,
+             fillNeonCombined<double>(results64A, results64B, VL / 8));
+  CHECK_NEON(1, double, fillNeonCombined<double>(results64A, {3.0}, VL / 8));
 }
 
 TEST_P(InstSve, fsqrt) {
diff --git a/test/unit/ArchitecturalRegisterFileSetTest.cc b/test/unit/ArchitecturalRegisterFileSetTest.cc
new file mode 100644
index 0000000000..1529ef1cea
--- /dev/null
+++ b/test/unit/ArchitecturalRegisterFileSetTest.cc
@@ -0,0 +1,41 @@
+#include "gtest/gtest.h"
+#include "simeng/ArchitecturalRegisterFileSet.hh"
+
+namespace simeng {
+namespace pipeline {
+
+class ArchitecturalRegisterFileSetTest : public ::testing::Test {
+ public:
+  ArchitecturalRegisterFileSetTest()
+      : physRegFileSet(regFileStruct), archRegFileSet(physRegFileSet) {}
+
+ protected:
+  const std::vector<RegisterFileStructure> regFileStruct = {
+      {8, 10}, {24, 15}, {256, 31}};
+
+  RegisterFileSet physRegFileSet;
+
+  ArchitecturalRegisterFileSet archRegFileSet;
+};
+
+// Ensure we can read and write values to the architectural register file
+TEST_F(ArchitecturalRegisterFileSetTest, readWrite) {
+  for (uint8_t i = 0; i < regFileStruct.size(); i++) {
+    const uint16_t regSize = regFileStruct[i].bytes;
+    const uint16_t maxRegTag = regFileStruct[i].quantity - 1;
+    const Register r0 = {i, 0};
+    const Register rMax = {i, maxRegTag};
+
+    EXPECT_EQ(archRegFileSet.get(r0), RegisterValue(0, regSize));
+    EXPECT_EQ(archRegFileSet.get(rMax), RegisterValue(0, regSize));
+
+    archRegFileSet.set(r0, RegisterValue(20, regSize));
+    archRegFileSet.set(rMax, RegisterValue(40, regSize));
+
+    EXPECT_EQ(archRegFileSet.get(r0), RegisterValue(20, regSize));
+    EXPECT_EQ(archRegFileSet.get(rMax), RegisterValue(40, regSize));
+  }
+}
+
+}  // namespace pipeline
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index a3e400aad2..fd1e4f9882 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -1,25 +1,45 @@
 set(TEST_SOURCES
+    aarch64/ArchInfoTest.cc
+    aarch64/ArchitectureTest.cc
+    aarch64/AuxiliaryFunctionsTest.cc
+    aarch64/ExceptionHandlerTest.cc
+    aarch64/InstructionTest.cc
+    riscv/ArchInfoTest.cc
+    riscv/ArchitectureTest.cc
+    riscv/ExceptionHandlerTest.cc
+    riscv/InstructionTest.cc
     pipeline/A64FXPortAllocatorTest.cc
     pipeline/BalancedPortAllocatorTest.cc
-    pipeline/ExecuteUnitTest.cc
     pipeline/DecodeUnitTest.cc
+    pipeline/DispatchIssueUnitTest.cc
     pipeline/ExecuteUnitTest.cc
     pipeline/FetchUnitTest.cc
     pipeline/LoadStoreQueueTest.cc
+    pipeline/M1PortAllocatorTest.cc
+    pipeline/MappedRegisterFileSetTest.cc
     pipeline/PipelineBufferTest.cc
     pipeline/RegisterAliasTableTest.cc
+    pipeline/RenameUnitTest.cc
     pipeline/ReorderBufferTest.cc
     pipeline/WritebackUnitTest.cc
+    ArchitecturalRegisterFileSetTest.cc
+    ElfTest.cc
+    FixedLatencyMemoryInterfaceTest.cc
+    FlatMemoryInterfaceTest.cc
     GenericPredictorTest.cc
-    ISATest.cc
-    RegisterValueTest.cc
+    OSTest.cc
     PoolTest.cc
-    ShiftValueTest.cc
-    LatencyMemoryInterfaceTest.cc
+    ProcessTest.cc
+    RegisterFileSetTest.cc
+    RegisterValueTest.cc
+    SpecialFileDirGenTest.cc
     )
 
 add_executable(unittests ${TEST_SOURCES})
 
+configure_file(${capstone_SOURCE_DIR}/arch/AArch64/AArch64GenInstrInfo.inc AArch64GenInstrInfo.inc COPYONLY)
+configure_file(${capstone_SOURCE_DIR}/arch/RISCV/RISCVGenInstrInfo.inc RISCVGenInstrInfo.inc COPYONLY)
+
 target_include_directories(unittests PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
 target_include_directories(unittests PUBLIC ${PROJECT_SOURCE_DIR}/src/lib)
 target_link_libraries(unittests libsimeng)
diff --git a/test/unit/ConfigInit.hh b/test/unit/ConfigInit.hh
new file mode 100644
index 0000000000..32b3c6ef6a
--- /dev/null
+++ b/test/unit/ConfigInit.hh
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "simeng/config/SimInfo.hh"
+#include "simeng/version.hh"
+
+namespace simeng {
+
+// This small class' purpose is to initialise the SimInfo config before the
+// initialisation of a test class
+class ConfigInit {
+ public:
+  ConfigInit(config::ISA isa, std::string configAdditions) {
+    config::SimInfo::generateDefault(isa, true);
+    config::SimInfo::addToConfig(configAdditions);
+  }
+};
+
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/ElfTest.cc b/test/unit/ElfTest.cc
new file mode 100644
index 0000000000..9635304bf3
--- /dev/null
+++ b/test/unit/ElfTest.cc
@@ -0,0 +1,67 @@
+#include "gmock/gmock.h"
+#include "simeng/Elf.hh"
+#include "simeng/version.hh"
+
+using ::testing::_;
+using ::testing::HasSubstr;
+using ::testing::Return;
+
+namespace simeng {
+
+class ElfTest : public testing::Test {
+ public:
+  ElfTest() {}
+
+ protected:
+  const std::string knownElfFilePath =
+      SIMENG_SOURCE_DIR "/test/unit/data/stream-aarch64.elf";
+
+  const uint64_t known_entryPoint = 4206008;
+  const uint16_t known_e_phentsize = 56;
+  const uint16_t known_e_phnum = 6;
+  const uint64_t known_phdrTableAddress = 4194368;
+  const uint64_t known_processImageSize = 5040480;
+
+  char* unwrappedProcImgPtr;
+};
+
+// Test that a valid ELF file can be created
+TEST_F(ElfTest, validElf) {
+  Elf elf(knownElfFilePath, &unwrappedProcImgPtr);
+
+  EXPECT_TRUE(elf.isValid());
+  EXPECT_EQ(elf.getEntryPoint(), known_entryPoint);
+  EXPECT_EQ(elf.getPhdrEntrySize(), known_e_phentsize);
+  EXPECT_EQ(elf.getNumPhdr(), known_e_phnum);
+  EXPECT_EQ(elf.getPhdrTableAddress(), known_phdrTableAddress);
+  EXPECT_EQ(elf.getProcessImageSize(), known_processImageSize);
+}
+
+// Test that wrong filepath results in invalid ELF
+TEST_F(ElfTest, invalidElf) {
+  Elf elf(SIMENG_SOURCE_DIR "/test/bogus_file_path___--__--__",
+          &unwrappedProcImgPtr);
+  EXPECT_FALSE(elf.isValid());
+}
+
+// Test that non-ELF file is not accepted
+TEST_F(ElfTest, nonElf) {
+  testing::internal::CaptureStderr();
+  Elf elf(SIMENG_SOURCE_DIR "/test/unit/ElfTest.cc", &unwrappedProcImgPtr);
+  EXPECT_FALSE(elf.isValid());
+  EXPECT_THAT(testing::internal::GetCapturedStderr(),
+              HasSubstr("[SimEng:Elf] Elf magic does not match"));
+}
+
+// Check that 32-bit ELF is not accepted
+TEST_F(ElfTest, format32Elf) {
+  testing::internal::CaptureStderr();
+  Elf elf(SIMENG_SOURCE_DIR "/test/unit/data/stream.rv32ima.elf",
+          &unwrappedProcImgPtr);
+  EXPECT_FALSE(elf.isValid());
+  EXPECT_THAT(
+      testing::internal::GetCapturedStderr(),
+      HasSubstr("[SimEng:Elf] Unsupported architecture detected in Elf"));
+}
+
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/FixedLatencyMemoryInterfaceTest.cc b/test/unit/FixedLatencyMemoryInterfaceTest.cc
new file mode 100644
index 0000000000..f5ea0d7581
--- /dev/null
+++ b/test/unit/FixedLatencyMemoryInterfaceTest.cc
@@ -0,0 +1,141 @@
+#include "gtest/gtest.h"
+#include "simeng/FixedLatencyMemoryInterface.hh"
+
+namespace {
+
+class FixedLatencyMemoryInterfaceTest
+    : public testing::TestWithParam<uint16_t> {
+ public:
+  FixedLatencyMemoryInterfaceTest()
+      : memory(memoryData.data(), memorySize, GetParam()) {}
+
+ protected:
+  static constexpr uint16_t memorySize = 4;
+  std::array<char, memorySize> memoryData = {(char)0xFE, (char)0xCA, (char)0xBA,
+                                             (char)0xAB};
+
+  simeng::RegisterValue value = {0xDEADBEEF, 4};
+  simeng::RegisterValue value_oversized = {0xDEADBEEFDEADBEEF, 8};
+  simeng::MemoryAccessTarget target = {0, 4};
+  simeng::MemoryAccessTarget target_OutOfBound1 = {1000, 4};
+  simeng::MemoryAccessTarget target_OutOfBound2 = {0, 8};
+
+  const std::string writeOverflowStr =
+      "Attempted to write beyond memory limit.";
+
+  simeng::FixedLatencyMemoryInterface memory;
+};
+
+// Test that we can read data and it completes after n cycles.
+TEST_P(FixedLatencyMemoryInterfaceTest, FixedReadData) {
+  // Read a 32-bit value
+  memory.requestRead(target, 1);
+  EXPECT_TRUE(memory.hasPendingRequests());
+
+  // Tick n-1 times - request should still be pending
+  uint16_t latency = GetParam();
+  for (int n = 0; n < latency - 1; n++) {
+    memory.tick();
+    EXPECT_TRUE(memory.hasPendingRequests());
+  }
+
+  // Tick again - request should have completed
+  memory.tick();
+  EXPECT_FALSE(memory.hasPendingRequests());
+
+  auto entries = memory.getCompletedReads();
+  EXPECT_EQ(entries.size(), 1);
+  EXPECT_EQ(entries[0].requestId, 1);
+  EXPECT_EQ(entries[0].data, simeng::RegisterValue(0xABBACAFE, 4));
+  EXPECT_EQ(entries[0].target, target);
+}
+
+// Test that we can write data and it completes after n cycles.
+TEST_P(FixedLatencyMemoryInterfaceTest, FixedWriteData) {
+  // Write a 32-bit value to memory
+  memory.requestWrite(target, value);
+  EXPECT_TRUE(memory.hasPendingRequests());
+
+  // Tick n-1 times - request should still be pending
+  uint16_t latency = GetParam();
+  for (int n = 0; n < latency - 1; n++) {
+    memory.tick();
+    EXPECT_TRUE(memory.hasPendingRequests());
+  }
+
+  // Tick again - request should have completed
+  memory.tick();
+  EXPECT_FALSE(memory.hasPendingRequests());
+  EXPECT_EQ(reinterpret_cast<uint32_t*>(memoryData.data())[0], 0xDEADBEEF);
+}
+
+// Test that out-of-bounds memory reads are correctly handled.
+TEST_P(FixedLatencyMemoryInterfaceTest, OutofBoundsRead) {
+  // Create a target such that address + size will overflow
+  memory.requestRead(target_OutOfBound1, 1);
+
+  // Create a regular out-of-bounds target
+  memory.requestRead(target_OutOfBound2, 2);
+
+  // Tick n-1 times - request shouldn't have completed
+  uint16_t latency = GetParam();
+  for (int n = 0; n < latency - 1; n++) {
+    memory.tick();
+    EXPECT_TRUE(memory.hasPendingRequests());
+  }
+
+  // Tick again - request should have completed
+  memory.tick();
+  EXPECT_FALSE(memory.hasPendingRequests());
+
+  auto entries = memory.getCompletedReads();
+  EXPECT_EQ(entries.size(), 2);
+
+  auto overflowResult = entries[0];
+  EXPECT_EQ(overflowResult.requestId, 1);
+  EXPECT_FALSE(overflowResult.data);
+  EXPECT_EQ(overflowResult.target, target_OutOfBound1);
+
+  overflowResult = entries[1];
+  EXPECT_EQ(overflowResult.requestId, 2);
+  EXPECT_FALSE(overflowResult.data);
+  EXPECT_EQ(overflowResult.target, target_OutOfBound2);
+}
+
+// Test that out-of-bounds memory writes are correctly handled.
+TEST_P(FixedLatencyMemoryInterfaceTest, OutofBoundsWrite_1) {
+  // Create a target such that address + size will overflow
+  memory.requestWrite(target_OutOfBound1, value);
+
+  // Tick n-1 times - request shouldn't have completed
+  uint16_t latency = GetParam();
+  for (int n = 0; n < latency - 1; n++) {
+    memory.tick();
+    EXPECT_TRUE(memory.hasPendingRequests());
+  }
+
+  // Tick again - simulation should have come to a stop
+  ASSERT_DEATH(memory.tick(), writeOverflowStr);
+}
+
+// Test that out-of-bounds memory writes are correctly handled.
+TEST_P(FixedLatencyMemoryInterfaceTest, OutofBoundsWrite_2) {
+  // Create a regular out-of-bounds target
+  memory.requestWrite(target_OutOfBound2, value_oversized);
+
+  // Tick n-1 times - request shouldn't have completed
+  uint16_t latency = GetParam();
+  for (int n = 0; n < latency - 1; n++) {
+    memory.tick();
+    EXPECT_TRUE(memory.hasPendingRequests());
+  }
+
+  // Tick again - simulation should have come to a stop
+  ASSERT_DEATH(memory.tick(), writeOverflowStr);
+}
+
+INSTANTIATE_TEST_SUITE_P(FixedLatencyMemoryInterfaceTests,
+                         FixedLatencyMemoryInterfaceTest,
+                         ::testing::Values<uint16_t>(2, 4));
+
+}  // namespace
diff --git a/test/unit/FlatMemoryInterfaceTest.cc b/test/unit/FlatMemoryInterfaceTest.cc
new file mode 100644
index 0000000000..2348588ec5
--- /dev/null
+++ b/test/unit/FlatMemoryInterfaceTest.cc
@@ -0,0 +1,81 @@
+#include "gtest/gtest.h"
+#include "simeng/FlatMemoryInterface.hh"
+
+namespace {
+
+class FlatMemoryInterfaceTest : public testing::Test {
+ public:
+  FlatMemoryInterfaceTest() : memory(memoryData.data(), memorySize) {}
+
+ protected:
+  static constexpr uint16_t memorySize = 4;
+  std::array<char, memorySize> memoryData = {(char)0xFE, (char)0xCA, (char)0xBA,
+                                             (char)0xAB};
+
+  simeng::RegisterValue value = {0xDEADBEEF, 4};
+  simeng::RegisterValue value_oversized = {0xDEADBEEFDEADBEEF, 8};
+  simeng::MemoryAccessTarget target = {0, 4};
+  simeng::MemoryAccessTarget target_OutOfBound1 = {1000, 4};
+  simeng::MemoryAccessTarget target_OutOfBound2 = {0, 8};
+
+  const std::string writeOverflowStr =
+      "Attempted to write beyond memory limit.";
+
+  simeng::FlatMemoryInterface memory;
+};
+
+// Test that we can read data and it completes after zero cycles.
+TEST_F(FlatMemoryInterfaceTest, FixedReadData) {
+  // Read a 32-bit value
+  memory.requestRead(target, 1);
+  auto entries = memory.getCompletedReads();
+  EXPECT_EQ(entries.size(), 1);
+  EXPECT_EQ(entries[0].requestId, 1);
+  EXPECT_EQ(entries[0].data, simeng::RegisterValue(0xABBACAFE, 4));
+  EXPECT_EQ(entries[0].target, target);
+}
+
+// Test that we can write data and it completes after zero cycles.
+TEST_F(FlatMemoryInterfaceTest, FixedWriteData) {
+  // Write a 32-bit value to memory
+  memory.requestWrite(target, value);
+  EXPECT_EQ(reinterpret_cast<uint32_t*>(memoryData.data())[0], 0xDEADBEEF);
+}
+
+// Test that out-of-bounds memory reads are correctly handled.
+TEST_F(FlatMemoryInterfaceTest, OutofBoundsRead) {
+  // Create a target such that address + size will overflow
+  memory.requestRead(target_OutOfBound1, 1);
+
+  // Create a regular out-of-bounds target
+  memory.requestRead(target_OutOfBound2, 2);
+
+  auto entries = memory.getCompletedReads();
+  EXPECT_EQ(entries.size(), 2);
+
+  auto overflowResult = entries[0];
+  EXPECT_EQ(overflowResult.requestId, 1);
+  EXPECT_FALSE(overflowResult.data);
+  EXPECT_EQ(overflowResult.target, target_OutOfBound1);
+
+  overflowResult = entries[1];
+  EXPECT_EQ(overflowResult.requestId, 2);
+  EXPECT_FALSE(overflowResult.data);
+  EXPECT_EQ(overflowResult.target, target_OutOfBound2);
+}
+
+// Test that out-of-bounds memory writes are correctly handled.
+TEST_F(FlatMemoryInterfaceTest, OutofBoundsWrite_1) {
+  // Create a target such that address + size will overflow
+  ASSERT_DEATH(memory.requestWrite(target_OutOfBound1, value),
+               writeOverflowStr);
+}
+
+// Test that out-of-bounds memory writes are correctly handled.
+TEST_F(FlatMemoryInterfaceTest, OutofBoundsWrite_2) {
+  // Create a regular out-of-bounds target
+  ASSERT_DEATH(memory.requestWrite(target_OutOfBound2, value_oversized),
+               writeOverflowStr);
+}
+
+}  // namespace
diff --git a/test/unit/GenericPredictorTest.cc b/test/unit/GenericPredictorTest.cc
index e5ba2a665e..898e7e93e2 100644
--- a/test/unit/GenericPredictorTest.cc
+++ b/test/unit/GenericPredictorTest.cc
@@ -157,4 +157,42 @@ TEST_F(GenericPredictorTest, GlobalIndexing) {
   predictor.update(0x1F, true, 0xBA, BranchType::Conditional);
 }
 
+// Test Flush of RAS functionality
+TEST_F(GenericPredictorTest, flush) {
+  simeng::config::SimInfo::addToConfig(
+      "{Branch-Predictor: {BTB-Tag-Bits: 11, Saturating-Count-Bits: 2, "
+      "Global-History-Length: 10, RAS-entries: 10, Fallback-Static-Predictor: "
+      "Always-Taken}}");
+  auto predictor = simeng::GenericPredictor();
+  // Add some entries to the RAS
+  auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8);
+  EXPECT_TRUE(prediction.taken);
+  EXPECT_EQ(prediction.target, 16);
+  prediction = predictor.predict(24, BranchType::SubroutineCall, 8);
+  EXPECT_TRUE(prediction.taken);
+  EXPECT_EQ(prediction.target, 32);
+  prediction = predictor.predict(40, BranchType::SubroutineCall, 8);
+  EXPECT_TRUE(prediction.taken);
+  EXPECT_EQ(prediction.target, 48);
+
+  // Start getting entries from RAS
+  prediction = predictor.predict(52, BranchType::Return, 0);
+  EXPECT_TRUE(prediction.taken);
+  EXPECT_EQ(prediction.target, 44);
+  prediction = predictor.predict(36, BranchType::Return, 0);
+  EXPECT_TRUE(prediction.taken);
+  EXPECT_EQ(prediction.target, 28);
+
+  // Flush address
+  predictor.flush(36);
+
+  // Continue getting entries from RAS
+  prediction = predictor.predict(20, BranchType::Return, 0);
+  EXPECT_TRUE(prediction.taken);
+  EXPECT_EQ(prediction.target, 28);
+  prediction = predictor.predict(16, BranchType::Return, 0);
+  EXPECT_TRUE(prediction.taken);
+  EXPECT_EQ(prediction.target, 12);
+}
+
 }  // namespace simeng
diff --git a/test/unit/ISATest.cc b/test/unit/ISATest.cc
deleted file mode 100644
index e3fe5584a8..0000000000
--- a/test/unit/ISATest.cc
+++ /dev/null
@@ -1,27 +0,0 @@
-#include "gtest/gtest.h"
-#include "simeng/RegisterFileSet.hh"
-#include "simeng/arch/aarch64/Architecture.hh"
-#include "simeng/arch/aarch64/Instruction.hh"
-
-namespace {
-
-// Test that we can create an AArch64 Architecture object
-TEST(ISATest, CreateAArch64) {
-  simeng::kernel::Linux kernel;
-  simeng::config::SimInfo::addToConfig("{Core: {Micro-Operations: True}}");
-
-  std::unique_ptr<simeng::arch::Architecture> isa =
-      std::make_unique<simeng::arch::aarch64::Architecture>(kernel);
-}
-
-// Test that we can set a value in a register file set
-TEST(ISATest, CreateRegisterFileSet) {
-  auto registerFileSet = simeng::RegisterFileSet({{8, 32}, {16, 32}, {1, 1}});
-  auto reg = simeng::Register{simeng::arch::aarch64::RegisterType::GENERAL, 0};
-
-  registerFileSet.set(reg, static_cast<uint64_t>(42));
-
-  EXPECT_TRUE(registerFileSet.get(reg));
-}
-
-}  // namespace
diff --git a/test/unit/LatencyMemoryInterfaceTest.cc b/test/unit/LatencyMemoryInterfaceTest.cc
deleted file mode 100644
index e728b9bf76..0000000000
--- a/test/unit/LatencyMemoryInterfaceTest.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "gtest/gtest.h"
-#include "simeng/FixedLatencyMemoryInterface.hh"
-
-namespace {
-
-// Test that we can write data and it completes after a number of cycles.
-TEST(LatencyMemoryInterfaceTest, FixedWriteData) {
-  // Create a memory interface with a two cycle latency
-  uint32_t memoryData = 0;
-  simeng::FixedLatencyMemoryInterface memory(
-      reinterpret_cast<char*>(&memoryData), 4, 2);
-  EXPECT_FALSE(memory.hasPendingRequests());
-
-  // Write a 32-bit value to memory
-  // Should ignore the 7 cycle latency and opt for the interface defined latency
-  simeng::MemoryAccessTarget target = {0, 4};
-  simeng::RegisterValue value = (uint32_t)0xDEADBEEF;
-  memory.requestWrite(target, value);
-  EXPECT_TRUE(memory.hasPendingRequests());
-
-  // Tick once - request should still be pending
-  memory.tick();
-  EXPECT_TRUE(memory.hasPendingRequests());
-
-  // Tick again - request should have completed
-  memory.tick();
-  EXPECT_FALSE(memory.hasPendingRequests());
-  EXPECT_EQ(memoryData, 0xDEADBEEF);
-}
-
-// Test that out-of-bounds memory reads are correctly handled.
-TEST(LatencyMemoryInterfaceTest, OutofBoundsRead) {
-  uint32_t memoryData = 0;
-  simeng::FixedLatencyMemoryInterface memory(
-      reinterpret_cast<char*>(&memoryData), 4, 1);
-
-  // Create a target such that address + size will overflow
-  simeng::MemoryAccessTarget overflowTarget = {UINT64_MAX, 4};
-  memory.requestRead(overflowTarget, 1);
-
-  // Create a regular out-of-bounds target
-  simeng::MemoryAccessTarget target = {0, 8};
-  memory.requestRead(target, 2);
-
-  // Tick once - request should have completed
-  memory.tick();
-  EXPECT_FALSE(memory.hasPendingRequests());
-
-  auto entries = memory.getCompletedReads();
-  EXPECT_EQ(entries.size(), 2);
-
-  auto overflowResult = entries[0];
-  EXPECT_EQ(overflowResult.requestId, 1);
-  EXPECT_EQ(overflowResult.data, simeng::RegisterValue());
-  EXPECT_EQ(overflowResult.target, overflowTarget);
-
-  auto result = entries[1];
-  EXPECT_EQ(result.requestId, 2);
-  EXPECT_EQ(result.data, simeng::RegisterValue());
-  EXPECT_EQ(result.target, target);
-}
-
-}  // namespace
diff --git a/test/unit/MockCore.hh b/test/unit/MockCore.hh
new file mode 100644
index 0000000000..524f9f5d20
--- /dev/null
+++ b/test/unit/MockCore.hh
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "gmock/gmock.h"
+#include "simeng/Core.hh"
+
+namespace simeng {
+
+/** Mock implementation of the `Core` interface. */
+class MockCore : public Core {
+ public:
+  MOCK_METHOD0(tick, void());
+  MOCK_CONST_METHOD0(hasHalted, bool());
+  MOCK_CONST_METHOD0(getArchitecturalRegisterFileSet,
+                     const ArchitecturalRegisterFileSet&());
+  MOCK_CONST_METHOD0(getInstructionsRetiredCount, uint64_t());
+  MOCK_CONST_METHOD0(getSystemTimer, uint64_t());
+  MOCK_CONST_METHOD0(getStats, std::map<std::string, std::string>());
+};
+
+}  // namespace simeng
diff --git a/test/unit/MockInstruction.hh b/test/unit/MockInstruction.hh
index 4d9757c24f..883c753413 100644
--- a/test/unit/MockInstruction.hh
+++ b/test/unit/MockInstruction.hh
@@ -8,7 +8,6 @@ namespace simeng {
 /** Mock implementation of the `Instruction` interface. */
 class MockInstruction : public Instruction {
  public:
-  MOCK_CONST_METHOD0(getException, InstructionException());
   MOCK_CONST_METHOD0(getSourceRegisters, const span<Register>());
   MOCK_CONST_METHOD0(getSourceOperands, const span<RegisterValue>());
   MOCK_CONST_METHOD0(getDestinationRegisters, const span<Register>());
@@ -22,6 +21,7 @@ class MockInstruction : public Instruction {
   MOCK_METHOD0(generateAddresses, span<const MemoryAccessTarget>());
   MOCK_METHOD2(supplyData, void(uint64_t address, const RegisterValue& data));
   MOCK_CONST_METHOD0(getGeneratedAddresses, span<const MemoryAccessTarget>());
+  MOCK_CONST_METHOD0(hasAllData, bool());
   MOCK_CONST_METHOD0(getData, span<const RegisterValue>());
 
   MOCK_CONST_METHOD0(checkEarlyBranchMisprediction,
@@ -33,10 +33,10 @@ class MockInstruction : public Instruction {
   MOCK_CONST_METHOD0(isStoreData, bool());
   MOCK_CONST_METHOD0(isLoad, bool());
   MOCK_CONST_METHOD0(isBranch, bool());
-  MOCK_CONST_METHOD0(isASIMD, bool());
-  MOCK_CONST_METHOD0(isPredicate, bool());
   MOCK_CONST_METHOD0(getGroup, uint16_t());
 
+  MOCK_CONST_METHOD0(getLSQLatency, uint16_t());
+
   MOCK_METHOD0(getSupportedPorts, const std::vector<uint16_t>&());
 
   void setBranchResults(bool wasTaken, uint64_t targetAddress) {
@@ -54,7 +54,13 @@ class MockInstruction : public Instruction {
 
   void setLatency(uint16_t cycles) { latency_ = cycles; }
 
+  void setLSQLatency(uint16_t cycles) { lsqExecutionLatency_ = cycles; }
+
   void setStallCycles(uint16_t cycles) { stallCycles_ = cycles; }
+
+  void setIsMicroOp(bool isMicroOp) { isMicroOp_ = isMicroOp; }
+
+  void setIsLastMicroOp(bool isLastOp) { isLastMicroOp_ = isLastOp; }
 };
 
 }  // namespace simeng
diff --git a/test/unit/MockPortAllocator.hh b/test/unit/MockPortAllocator.hh
new file mode 100644
index 0000000000..19d7142b74
--- /dev/null
+++ b/test/unit/MockPortAllocator.hh
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "gmock/gmock.h"
+#include "simeng/pipeline/PortAllocator.hh"
+
+namespace simeng {
+namespace pipeline {
+
+/** Mock implementation of the `PortAllocator` interface. */
+class MockPortAllocator : public pipeline::PortAllocator {
+ public:
+  MOCK_METHOD1(allocate, uint16_t(const std::vector<uint16_t>& ports));
+  MOCK_METHOD1(issued, void(uint16_t port));
+  MOCK_METHOD1(deallocate, void(uint16_t port));
+  MOCK_METHOD1(setRSSizeGetter,
+               void(std::function<void(std::vector<uint64_t>&)> rsSizes));
+  MOCK_METHOD0(tick, void());
+};
+
+}  // namespace pipeline
+}  // namespace simeng
diff --git a/test/unit/OSTest.cc b/test/unit/OSTest.cc
new file mode 100644
index 0000000000..1b2f7803eb
--- /dev/null
+++ b/test/unit/OSTest.cc
@@ -0,0 +1,77 @@
+#include "ConfigInit.hh"
+#include "gtest/gtest.h"
+#include "simeng/kernel/Linux.hh"
+#include "simeng/kernel/LinuxProcess.hh"
+#include "simeng/span.hh"
+
+namespace simeng {
+
+class OSTest : public testing::Test {
+ public:
+  OSTest()
+      : os(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"]
+               .as<std::string>()),
+        proc_elf(simeng::kernel::LinuxProcess(cmdLine)),
+        proc_hex(simeng::span<char>(reinterpret_cast<char*>(demoHex),
+                                    sizeof(demoHex))) {}
+
+ protected:
+  ConfigInit configInit = ConfigInit(
+      config::ISA::AArch64,
+      R"YAML({Process-Image: {Heap-Size: 1073741824, Stack-Size: 1048576}})YAML");
+
+  const std::vector<std::string> cmdLine = {
+      SIMENG_SOURCE_DIR "/test/unit/data/stream-aarch64.elf"};
+
+  simeng::kernel::Linux os;
+  simeng::kernel::LinuxProcess proc_elf;
+  simeng::kernel::LinuxProcess proc_hex;
+
+  // A simple program used to test the functionality of creating a process with
+  // a stream of hex instructions.
+  uint32_t demoHex[7] = {
+      0x320C03E0,  // orr w0, wzr, #1048576
+      0x320003E1,  // orr w0, wzr, #1
+      0x71000400,  // subs w0, w0, #1
+      0x54FFFFC1,  // b.ne -8
+                   // .exit:
+      0xD2800000,  // mov x0, #0
+      0xD2800BC8,  // mov x8, #94
+      0xD4000001,  // svc #0
+  };
+};
+
+// These tests verify the functionality of both the `createProcess()` and
+// `getInitialStackPointer()` functions. All other functions for this class are
+// syscalls and are tested in the Regression suite.
+TEST_F(OSTest, processElf_stackPointer) {
+  os.createProcess(proc_elf);
+  // cmdLine[0] length will change depending on the host system so final stack
+  // pointer needs to be calculated manually
+  // cmdLineSize + 1 for null seperator
+  const uint64_t cmdLineSize = cmdLine[0].size() + 1;
+  // "OMP_NUM_THREADS=1" + 1 for null seperator
+  const uint64_t envStringsSize = 18;
+  // Size of initial stack frame as per LinuxProcess.cc:createStack()
+  // - (17 push_backs) * 8
+  // https://www.win.tue.nl/~aeb/linux/hh/stack-layout.html
+  const uint64_t stackFrameSize = 17 * 8;
+  // cmd + Env needs +1 for null seperator
+  const uint64_t stackPointer =
+      proc_elf.getStackStart() -
+      kernel::alignToBoundary(cmdLineSize + envStringsSize + 1, 32) -
+      kernel::alignToBoundary(stackFrameSize, 32);
+  EXPECT_EQ(os.getInitialStackPointer(), stackPointer);
+  EXPECT_EQ(os.getInitialStackPointer(), proc_elf.getStackPointer());
+}
+
+TEST_F(OSTest, processHex_stackPointer) {
+  os.createProcess(proc_hex);
+  EXPECT_EQ(os.getInitialStackPointer(), 1074790240);
+  EXPECT_EQ(os.getInitialStackPointer(), proc_hex.getStackPointer());
+}
+
+// createProcess
+// getInitialStackPointer
+
+}  // namespace simeng
diff --git a/test/unit/ProcessTest.cc b/test/unit/ProcessTest.cc
new file mode 100644
index 0000000000..26858164e5
--- /dev/null
+++ b/test/unit/ProcessTest.cc
@@ -0,0 +1,119 @@
+#include "ConfigInit.hh"
+#include "gtest/gtest.h"
+#include "simeng/kernel/LinuxProcess.hh"
+#include "simeng/version.hh"
+
+namespace simeng {
+
+class ProcessTest : public testing::Test {
+ public:
+  ProcessTest() {}
+
+ protected:
+  ConfigInit configInit = ConfigInit(
+      config::ISA::AArch64,
+      R"YAML({Process-Image: {Heap-Size: 1073741824, Stack-Size: 1048576}})YAML");
+
+  const std::vector<std::string> cmdLine = {
+      SIMENG_SOURCE_DIR "/test/unit/data/stream-aarch64.elf"};
+
+  // Program used when no executable is provided; counts down from
+  // 1024*1024, with an independent `orr` at the start of each branch.
+  uint32_t demoHex[7] = {
+      0x320C03E0,  // orr w0, wzr, #1048576
+      0x320003E1,  // orr w0, wzr, #1
+      0x71000400,  // subs w0, w0, #1
+      0x54FFFFC1,  // b.ne -8
+                   // .exit:
+      0xD2800000,  // mov x0, #0
+      0xD2800BC8,  // mov x8, #94
+      0xD4000001,  // svc #0
+  };
+};
+
+TEST_F(ProcessTest, alignToBoundary) {
+  EXPECT_EQ(kernel::alignToBoundary(63, 64), 64);
+  EXPECT_EQ(kernel::alignToBoundary(1, 64), 64);
+  EXPECT_EQ(kernel::alignToBoundary(65, 64), 128);
+}
+
+// Tests createProcess(), isValid(), and getPath() functions.
+TEST_F(ProcessTest, createProcess_elf) {
+  kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine);
+  EXPECT_TRUE(proc.isValid());
+  EXPECT_EQ(proc.getPath(),
+            SIMENG_SOURCE_DIR "/test/unit/data/stream-aarch64.elf");
+}
+
+// Tests createProcess(), isValid(), and getPath() functions.
+TEST_F(ProcessTest, createProcess_hex) {
+  kernel::LinuxProcess proc = kernel::LinuxProcess(
+      span(reinterpret_cast<char*>(demoHex), sizeof(demoHex)));
+  EXPECT_TRUE(proc.isValid());
+  EXPECT_EQ(proc.getPath(), "\0");
+}
+
+// Tests get{Heap, Stack, Mmap}Start() functions
+TEST_F(ProcessTest, get_x_Start) {
+  kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine);
+  EXPECT_TRUE(proc.isValid());
+  const uint64_t heapStart = 5040480;
+  uint64_t heapSize =
+      config::SimInfo::getConfig()["Process-Image"]["Heap-Size"].as<uint64_t>();
+  uint64_t stackSize =
+      config::SimInfo::getConfig()["Process-Image"]["Stack-Size"]
+          .as<uint64_t>();
+  EXPECT_EQ(proc.getHeapStart(), heapStart);
+  EXPECT_EQ(proc.getMmapStart(),
+            kernel::alignToBoundary(heapStart + ((heapSize + stackSize) / 2),
+                                    proc.getPageSize()));
+  EXPECT_EQ(proc.getStackStart(), heapStart + heapSize + stackSize);
+}
+
+TEST_F(ProcessTest, getPageSize) {
+  kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine);
+  EXPECT_TRUE(proc.isValid());
+  EXPECT_EQ(proc.getPageSize(), 4096);
+}
+
+TEST_F(ProcessTest, getProcessImage) {
+  kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine);
+  EXPECT_TRUE(proc.isValid());
+  EXPECT_NE(proc.getProcessImage(), nullptr);
+}
+
+TEST_F(ProcessTest, getProcessImageSize) {
+  kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine);
+  EXPECT_TRUE(proc.isValid());
+  EXPECT_EQ(proc.getProcessImageSize(), 1079830880);
+}
+
+TEST_F(ProcessTest, getEntryPoint) {
+  kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine);
+  EXPECT_TRUE(proc.isValid());
+  EXPECT_EQ(proc.getEntryPoint(), 4206008);
+}
+
+TEST_F(ProcessTest, getStackPointer) {
+  kernel::LinuxProcess proc = kernel::LinuxProcess(cmdLine);
+  EXPECT_TRUE(proc.isValid());
+  // cmdLine[0] length will change depending on the host system so final stack
+  // pointer needs to be calculated manually
+  // cmdLineSize + 1 for null seperator
+  const uint64_t cmdLineSize = cmdLine[0].size() + 1;
+  // "OMP_NUM_THREADS=1" + 1 for null seperator
+  const uint64_t envStringsSize = 18;
+  // Size of initial stack frame (17 push_backs) * 8
+  const uint64_t stackFrameSize = 17 * 8;
+  // cmd + Env needs +1 for null seperator
+  const uint64_t stackPointer =
+      proc.getStackStart() -
+      kernel::alignToBoundary(cmdLineSize + envStringsSize + 1, 32) -
+      kernel::alignToBoundary(stackFrameSize, 32);
+  EXPECT_EQ(proc.getStackPointer(), stackPointer);
+}
+
+}  // namespace simeng
+
+// getEntryPoint
+// getStackPointer
\ No newline at end of file
diff --git a/test/unit/RegisterFileSetTest.cc b/test/unit/RegisterFileSetTest.cc
new file mode 100644
index 0000000000..ed8485eb61
--- /dev/null
+++ b/test/unit/RegisterFileSetTest.cc
@@ -0,0 +1,48 @@
+#include "gtest/gtest.h"
+#include "simeng/RegisterFileSet.hh"
+
+namespace simeng {
+namespace pipeline {
+
+class RegisterFileSetTest : public ::testing::Test {
+ public:
+  RegisterFileSetTest() : regFileSet(regFileStruct) {}
+
+ protected:
+  const std::vector<RegisterFileStructure> regFileStruct = {
+      {8, 10}, {24, 15}, {256, 31}};
+
+  RegisterFileSet regFileSet;
+};
+
+// Ensure RegisterFileSet is constructed correctly
+TEST_F(RegisterFileSetTest, validConstruction) {
+  for (uint8_t i = 0; i < regFileStruct.size(); i++) {
+    for (uint16_t j = 0; j < regFileStruct[i].quantity; j++) {
+      const Register reg = {i, j};
+      EXPECT_EQ(regFileSet.get(reg), RegisterValue(0, regFileStruct[i].bytes));
+    }
+  }
+}
+
+// Ensure we can read and write values to the register file
+TEST_F(RegisterFileSetTest, readWrite) {
+  for (uint8_t i = 0; i < regFileStruct.size(); i++) {
+    const uint16_t regSize = regFileStruct[i].bytes;
+    const uint16_t maxRegTag = regFileStruct[i].quantity - 1;
+    const Register r0 = {i, 0};
+    const Register rMax = {i, maxRegTag};
+
+    EXPECT_EQ(regFileSet.get(r0), RegisterValue(0, regSize));
+    EXPECT_EQ(regFileSet.get(rMax), RegisterValue(0, regSize));
+
+    regFileSet.set(r0, RegisterValue(20, regSize));
+    regFileSet.set(rMax, RegisterValue(40, regSize));
+
+    EXPECT_EQ(regFileSet.get(r0), RegisterValue(20, regSize));
+    EXPECT_EQ(regFileSet.get(rMax), RegisterValue(40, regSize));
+  }
+}
+
+}  // namespace pipeline
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/ShiftValueTest.cc b/test/unit/ShiftValueTest.cc
deleted file mode 100644
index 8cfe74e731..0000000000
--- a/test/unit/ShiftValueTest.cc
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "gtest/gtest.h"
-#include "simeng/arch/aarch64/Instruction.hh"
-
-namespace {
-
-TEST(ShiftValueTest, ROR) {
-  const auto ARM64_SFT_ROR = 5;
-
-  // 32-bit
-  const uint32_t a = 0x0000FFFF;
-  EXPECT_EQ(simeng::arch::aarch64::shiftValue(a, ARM64_SFT_ROR, 16),
-            0xFFFF0000);
-
-  const uint32_t b = 0xFFFF0000;
-  EXPECT_EQ(simeng::arch::aarch64::shiftValue(b, ARM64_SFT_ROR, 31),
-            0xFFFE0001);
-
-  EXPECT_EQ(simeng::arch::aarch64::shiftValue(b, ARM64_SFT_ROR, 0), 0xFFFF0000);
-
-  // 64-bit
-  const uint64_t c = 0x00000000FFFFFFFF;
-  EXPECT_EQ(simeng::arch::aarch64::shiftValue(c, ARM64_SFT_ROR, 32),
-            0xFFFFFFFF00000000);
-
-  const uint64_t d = 0xFFFFFFFF00000000;
-  EXPECT_EQ(simeng::arch::aarch64::shiftValue(d, ARM64_SFT_ROR, 63),
-            0xFFFFFFFE00000001);
-
-  EXPECT_EQ(simeng::arch::aarch64::shiftValue(d, ARM64_SFT_ROR, 0),
-            0xFFFFFFFF00000000);
-}
-
-}  // namespace
\ No newline at end of file
diff --git a/test/unit/SpecialFileDirGenTest.cc b/test/unit/SpecialFileDirGenTest.cc
new file mode 100644
index 0000000000..745750b90f
--- /dev/null
+++ b/test/unit/SpecialFileDirGenTest.cc
@@ -0,0 +1,136 @@
+#include "ConfigInit.hh"
+#include "gmock/gmock.h"
+#include "simeng/SpecialFileDirGen.hh"
+#include "simeng/version.hh"
+
+namespace simeng {
+
+#define TEST_SPEC_FILE_DIR SIMENG_SOURCE_DIR "/test/unit/specialFiles/"
+
+class SpecialFileDirGenTest : public testing::Test {
+ public:
+  SpecialFileDirGenTest() {}
+
+ protected:
+  ConfigInit configInit = ConfigInit(config::ISA::AArch64,
+                                     R"YAML({
+        CPU-Info: {
+          Generate-Special-Dir: True,
+          Special-File-Dir-Path: )YAML" TEST_SPEC_FILE_DIR R"YAML(,
+          Core-Count: 1,
+          Socket-Count: 1,
+          SMT: 1,
+          BogoMIPS: 200.00,
+          Features: fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm fcma dcpop sve,
+          CPU-Implementer: 0x46,
+          CPU-Architecture: 8,
+          CPU-Variant: 0x1,
+          CPU-Part: 0x001,
+          CPU-Revision: 0,
+          Package-Count: 1
+        }
+      })YAML");
+
+  SpecialFileDirGen specFile;
+
+  const std::vector<std::pair<std::string, std::vector<std::string>>>
+      allFiles_names_Lines = {
+          std::pair<std::string, std::vector<std::string>>(
+              "proc/cpuinfo",
+              {"processor	: 0", "BogoMIPS	: 200.00",
+               "Features	: fp asimd evtstrm sha1 sha2 "
+               "crc32 atomics fphp asimdhp cpuid "
+               "asimdrdm fcma dcpop sve",
+               "CPU implementer	: 0x46", "CPU architecture: 8",
+               "CPU variant	: 0x1", "CPU part	: 0x001",
+               "CPU revision	: 0", ""}),
+          std::pair<std::string, std::vector<std::string>>(
+              "proc/stat",
+              {"cpu  0 0 0 0 0 0 0 0 0 0", "cpu0 0 0 0 0 0 0 0 0 0 0",
+               "intr 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 "
+               "0 0 0 0 "
+               "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 "
+               "0 0 0 0 0 "
+               "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 "
+               "0 0 0 0 0 "
+               "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 "
+               "0 0 0 0 0 "
+               "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 "
+               "0 0 0 0 0 "
+               "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
+               "ctxt 0", "btime 0", "processes 0", "procs_running 1",
+               "procs_blocked 0", "softirq 0 0 0 0 0 0 0 0 0 0 0"}),
+          std::pair<std::string, std::vector<std::string>>(
+              "sys/devices/system/cpu/cpu0/topology/core_id", {"0"}),
+          std::pair<std::string, std::vector<std::string>>(
+              "sys/devices/system/cpu/cpu0/topology/physical_package_id",
+              {"0"}),
+          std::pair<std::string, std::vector<std::string>>(
+              "sys/devices/system/cpu/online", {"0-0"})};
+};
+
+// Test that we can generate and delete special files to a custom directory
+// (i.e. the one defined in the YAML string above)
+TEST_F(SpecialFileDirGenTest, genAndDelete) {
+  // Make sure files currently do not exist
+  for (int i = 0; i < allFiles_names_Lines.size(); i++) {
+    EXPECT_FALSE(
+        std::ifstream(TEST_SPEC_FILE_DIR + std::get<0>(allFiles_names_Lines[i]))
+            .good());
+  }
+
+  // Generate files
+  specFile.GenerateSFDir();
+
+  // Validate files exist and are correct
+  for (int i = 0; i < allFiles_names_Lines.size(); i++) {
+    EXPECT_TRUE(
+        std::ifstream(TEST_SPEC_FILE_DIR + std::get<0>(allFiles_names_Lines[i]))
+            .good());
+    std::ifstream file(TEST_SPEC_FILE_DIR +
+                       std::get<0>(allFiles_names_Lines[i]));
+    const std::vector<std::string>& knownLines =
+        std::get<1>(allFiles_names_Lines[i]);
+    std::string line;
+    int numOfLines = 0;
+    while (std::getline(file, line)) {
+      if (numOfLines > knownLines.size()) {
+        break;
+      }
+      EXPECT_EQ(line, knownLines[numOfLines]);
+      numOfLines++;
+    }
+    EXPECT_EQ(numOfLines, knownLines.size());
+  }
+
+  // Delete files
+  specFile.RemoveExistingSFDir();
+
+  // Make sure files don't exist
+  for (int i = 0; i < allFiles_names_Lines.size(); i++) {
+    EXPECT_FALSE(
+        std::ifstream(TEST_SPEC_FILE_DIR + std::get<0>(allFiles_names_Lines[i]))
+            .good());
+  }
+}
+
+// Test that a non-existant non-default special file directory causes the user
+// to be notified when generation is set to False
+TEST_F(SpecialFileDirGenTest, doesntExist) {
+  // Reset SimInfo Config
+  ASSERT_DEATH(
+      config::SimInfo::addToConfig(
+          "CPU-Info: {Generate-Special-Dir: False, "
+          "Special-File-Dir-Path: " SIMENG_BUILD_DIR "/thisDoesntExistDir/"
+          ", Core-Count: 1, Socket-Count: 1, SMT: 1, BogoMIPS: 200.00, "
+          "Features: "
+          "fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid "
+          "asimdrdm "
+          "fcma dcpop sve, CPU-Implementer: 0x46, CPU-Architecture: 8, "
+          "CPU-Variant: 0x1, CPU-Part: 0x001, CPU-Revision: 0, Package-Count: "
+          "1}}"),
+      "- Special File Directory '" SIMENG_BUILD_DIR
+      "/thisDoesntExistDir/' does not exist");
+}
+
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/aarch64/ArchInfoTest.cc b/test/unit/aarch64/ArchInfoTest.cc
new file mode 100644
index 0000000000..13978639e5
--- /dev/null
+++ b/test/unit/aarch64/ArchInfoTest.cc
@@ -0,0 +1,74 @@
+#include "gtest/gtest.h"
+#include "simeng/arch/aarch64/ArchInfo.hh"
+#include "simeng/config/SimInfo.hh"
+#include "simeng/version.hh"
+
+namespace simeng {
+namespace arch {
+namespace aarch64 {
+
+class AArch64ArchInfoTest : public ::testing::Test {
+ public:
+  AArch64ArchInfoTest() {
+    simeng::config::SimInfo::setConfig(SIMENG_SOURCE_DIR
+                                       "/configs/a64fx_SME.yaml");
+  }
+
+ protected:
+  const std::vector<uint64_t> sysRegisterEnums = {
+      arm64_sysreg::ARM64_SYSREG_DCZID_EL0,
+      arm64_sysreg::ARM64_SYSREG_FPCR,
+      arm64_sysreg::ARM64_SYSREG_FPSR,
+      arm64_sysreg::ARM64_SYSREG_TPIDR_EL0,
+      arm64_sysreg::ARM64_SYSREG_MIDR_EL1,
+      arm64_sysreg::ARM64_SYSREG_CNTVCT_EL0,
+      arm64_sysreg::ARM64_SYSREG_PMCCNTR_EL0,
+      arm64_sysreg::ARM64_SYSREG_SVCR};
+
+  const std::vector<simeng::RegisterFileStructure> archRegStruct = {
+      {8, 32},
+      {256, 32},
+      {32, 17},
+      {1, 1},
+      {8, static_cast<uint16_t>(sysRegisterEnums.size())},
+      {256, 64}};
+
+  const std::vector<simeng::RegisterFileStructure> physRegStruct = {
+      {8, 96},
+      {256, 128},
+      {32, 48},
+      {1, 128},
+      {8, static_cast<uint16_t>(sysRegisterEnums.size())},
+      {256, 128}};
+
+  const std::vector<uint16_t> physRegQuants = {
+      96, 128, 48, 128, static_cast<uint16_t>(sysRegisterEnums.size()), 128};
+};
+
+// Test for the getSysRegEnums() function
+TEST_F(AArch64ArchInfoTest, getSysRegEnums) {
+  ArchInfo info = ArchInfo(config::SimInfo::getConfig());
+  EXPECT_EQ(info.getSysRegEnums(), sysRegisterEnums);
+}
+
+// Test for the getArchRegStruct() function
+TEST_F(AArch64ArchInfoTest, getArchRegStruct) {
+  ArchInfo info = ArchInfo(config::SimInfo::getConfig());
+  EXPECT_EQ(info.getArchRegStruct(), archRegStruct);
+}
+
+// Test for the getPhysRegStruct() function
+TEST_F(AArch64ArchInfoTest, getPhysRegStruct) {
+  ArchInfo info = ArchInfo(config::SimInfo::getConfig());
+  EXPECT_EQ(info.getPhysRegStruct(), physRegStruct);
+}
+
+// Test for the getPhysRegQuantities() function
+TEST_F(AArch64ArchInfoTest, getPhysRegQuantities) {
+  ArchInfo info = ArchInfo(config::SimInfo::getConfig());
+  EXPECT_EQ(info.getPhysRegQuantities(), physRegQuants);
+}
+
+}  // namespace aarch64
+}  // namespace arch
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/aarch64/ArchitectureTest.cc b/test/unit/aarch64/ArchitectureTest.cc
new file mode 100644
index 0000000000..63b2805ce3
--- /dev/null
+++ b/test/unit/aarch64/ArchitectureTest.cc
@@ -0,0 +1,240 @@
+#include <iostream>
+
+#include "../ConfigInit.hh"
+#include "gtest/gtest.h"
+#include "simeng/CoreInstance.hh"
+#include "simeng/RegisterFileSet.hh"
+#include "simeng/arch/aarch64/Architecture.hh"
+#include "simeng/arch/riscv/Architecture.hh"
+#include "simeng/span.hh"
+#include "simeng/version.hh"
+
+namespace simeng {
+namespace arch {
+namespace aarch64 {
+
+// AArch64 Tests
+class AArch64ArchitectureTest : public testing::Test {
+ public:
+  AArch64ArchitectureTest()
+      : kernel(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"]
+                   .as<std::string>()) {
+    arch = std::make_unique<Architecture>(kernel);
+    kernel.createProcess(process);
+  }
+
+ protected:
+  // Setting core model to complex OoO model to more verbosely test the
+  // Architecture class.
+  ConfigInit configInit = ConfigInit(config::ISA::AArch64, R"YAML({
+    Core: {
+      Simulation-Mode: outoforder,
+      Vector-Length: 512,
+      Streaming-Vector-Length: 128
+    },
+    Ports: { 
+      '0': {Portname: Port 0, Instruction-Group-Support: [FP, SVE]},
+      '1': {Portname: Port 1, Instruction-Group-Support: [PREDICATE]},
+      '2': {Portname: Port 2, Instruction-Group-Support: [INT_SIMPLE, INT_MUL, STORE_DATA]},
+      '3': {Portname: Port 3, Instruction-Group-Support: [FP_SIMPLE, FP_MUL, SVE_SIMPLE, SVE_MUL]},
+      '4': {Portname: Port 4, Instruction-Group-Support: [INT_SIMPLE, INT_DIV_OR_SQRT]},
+      '5': {Portname: Port 5, Instruction-Group-Support: [LOAD, STORE_ADDRESS, INT_SIMPLE_ARTH_NOSHIFT, INT_SIMPLE_LOGICAL_NOSHIFT, INT_SIMPLE_CMP]},
+      '6': {Portname: Port 6, Instruction-Group-Support: [LOAD, STORE_ADDRESS, INT_SIMPLE_ARTH_NOSHIFT, INT_SIMPLE_LOGICAL_NOSHIFT, INT_SIMPLE_CMP]},
+      '7': {Portname: Port 7, Instruction-Group-Support: [BRANCH]}
+    },
+    Reservation-Stations: {
+      '0': {Size: 20, Dispatch-Rate: 2, Ports: [Port 0, Port 1, Port 2]},
+      '1': {Size: 20, Dispatch-Rate: 2, Ports: [Port 3, Port 4]},
+      '2': {Size: 10, Dispatch-Rate: 1, Ports: [Port 5]},
+      '3': {Size: 10, Dispatch-Rate: 1, Ports: [Port 6]},
+      '4': {Size: 19, Dispatch-Rate: 1, Ports: [Port 7]},
+    },
+    Execution-Units: {
+      '0': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+      '1': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+      '2': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+      '3': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+      '4': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+      '5': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+      '6': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+      '7': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}
+    },
+    Latencies: {
+      '0': {Instruction-Groups: [INT], Execution-Latency: 2, Execution-Throughput: 2},
+      '1': {Instruction-Groups: [INT_SIMPLE_ARTH_NOSHIFT, INT_SIMPLE_LOGICAL_NOSHIFT, INT_SIMPLE_CVT], Execution-Latency: 1, Execution-Throughput: 1},
+      '2': {Instruction-Groups: [INT_MUL], Execution-Latency: 5, Execution-Throughput: 1},
+      '3': {Instruction-Groups: [INT_DIV_OR_SQRT], Execution-Latency: 41, Execution-Throughput: 41},
+      '4': {Instruction-Groups: [SCALAR_SIMPLE, VECTOR_SIMPLE_LOGICAL, SVE_SIMPLE_LOGICAL, VECTOR_SIMPLE_CMP, SVE_SIMPLE_CMP], Execution-Latency: 4, Execution-Throughput: 1},
+      '5': {Instruction-Groups: [FP_DIV_OR_SQRT], Execution-Latency: 29, Execution-Throughput: 29},
+      '6': {Instruction-Groups: [VECTOR_SIMPLE, SVE_SIMPLE, SCALAR_SIMPLE_CVT, FP_MUL, SVE_MUL], Execution-Latency: 9, Execution-Throughput: 1},
+      '7': {Instruction-Groups: [SVE_DIV_OR_SQRT], Execution-Latency: 98, Execution-Throughput: 98},
+      '8': {Instruction-Groups: [PREDICATE], Execution-Latency: 3, Execution-Throughput: 1},
+      '9': {Instruction-Groups: [LOAD_SCALAR, LOAD_VECTOR, STORE_ADDRESS_SCALAR, STORE_ADDRESS_VECTOR], Execution-Latency: 3, Execution-Throughput: 1},
+      '10': {Instruction-Groups: [LOAD_SVE, STORE_ADDRESS_SVE], Execution-Latency: 6, Execution-Throughput: 1}
+    }
+  })YAML");
+
+  // fdivr z1.s, p0/m, z1.s, z0.s
+  std::array<uint8_t, 4> validInstrBytes = {0x01, 0x80, 0x8c, 0x65};
+  std::array<uint8_t, 4> invalidInstrBytes = {0x20, 0x00, 0x02, 0x8c};
+
+  std::unique_ptr<Architecture> arch;
+  kernel::Linux kernel;
+  kernel::LinuxProcess process = kernel::LinuxProcess(
+      span((char*)validInstrBytes.data(), validInstrBytes.size()));
+};
+
+TEST_F(AArch64ArchitectureTest, predecode) {
+  // Test that mis-aligned instruction address results in error
+  MacroOp output;
+  uint8_t result = arch->predecode(validInstrBytes.data(),
+                                   validInstrBytes.size(), 0x7, output);
+  Instruction* aarch64Insn = reinterpret_cast<Instruction*>(output[0].get());
+  EXPECT_EQ(result, 1);
+  EXPECT_EQ(aarch64Insn->getInstructionAddress(), 0x7);
+  EXPECT_EQ(aarch64Insn->exceptionEncountered(), true);
+  EXPECT_EQ(aarch64Insn->getException(), InstructionException::MisalignedPC);
+
+  // Test that an invalid instruction returns instruction with an exception
+  output = MacroOp();
+  result = arch->predecode(invalidInstrBytes.data(), invalidInstrBytes.size(),
+                           0x8, output);
+  aarch64Insn = reinterpret_cast<Instruction*>(output[0].get());
+  EXPECT_EQ(result, 4);
+  EXPECT_EQ(aarch64Insn->getInstructionAddress(), 0x8);
+  EXPECT_EQ(aarch64Insn->exceptionEncountered(), true);
+  EXPECT_EQ(aarch64Insn->getException(),
+            InstructionException::EncodingUnallocated);
+
+  // Test that an instruction can be properly decoded
+  output = MacroOp();
+  result = arch->predecode(validInstrBytes.data(), validInstrBytes.size(), 0x4,
+                           output);
+  EXPECT_EQ(result, 4);
+  EXPECT_EQ(output[0]->getInstructionAddress(), 0x4);
+  EXPECT_EQ(output[0]->exceptionEncountered(), false);
+}
+
+TEST_F(AArch64ArchitectureTest, getSystemRegisterTag) {
+  // Test incorrect system register will fail
+  int32_t output = arch->getSystemRegisterTag(-1);
+  EXPECT_EQ(output, -1);
+
+  // Test for correct behaviour
+  output = arch->getSystemRegisterTag(ARM64_SYSREG_DCZID_EL0);
+  EXPECT_EQ(output, 0);
+}
+
+TEST_F(AArch64ArchitectureTest, handleException) {
+  // Get Instruction
+  MacroOp insn;
+  uint8_t bytes = arch->predecode(invalidInstrBytes.data(),
+                                  invalidInstrBytes.size(), 0x4, insn);
+  Instruction* aarch64Insn = reinterpret_cast<Instruction*>(insn[0].get());
+  EXPECT_EQ(bytes, 4);
+  EXPECT_EQ(aarch64Insn->getInstructionAddress(), 0x4);
+  EXPECT_EQ(aarch64Insn->exceptionEncountered(), true);
+  EXPECT_EQ(aarch64Insn->getException(),
+            InstructionException::EncodingUnallocated);
+
+  // Get Core
+  std::string executablePath = "";
+  std::vector<std::string> executableArgs = {};
+  std::unique_ptr<CoreInstance> coreInstance =
+      std::make_unique<CoreInstance>(executablePath, executableArgs);
+  const Core& core = *coreInstance->getCore();
+  MemoryInterface& memInt = *coreInstance->getDataMemory();
+  auto exceptionHandler = arch->handleException(insn[0], core, memInt);
+
+  bool tickRes = exceptionHandler->tick();
+  auto result = exceptionHandler->getResult();
+  EXPECT_TRUE(tickRes);
+  EXPECT_TRUE(result.fatal);
+  // Instruction address for fatal exception is always 0.
+  EXPECT_EQ(result.instructionAddress, 0x0);
+}
+
+TEST_F(AArch64ArchitectureTest, getInitialState) {
+  std::vector<Register> regs = {
+      {RegisterType::GENERAL, 31},
+      {RegisterType::SYSTEM,
+       (uint16_t)arch->getSystemRegisterTag(ARM64_SYSREG_DCZID_EL0)}};
+  std::vector<RegisterValue> regVals = {{kernel.getInitialStackPointer(), 8},
+                                        {20, 8}};
+
+  arch::ProcessStateChange changes = arch->getInitialState();
+  EXPECT_EQ(changes.type, arch::ChangeType::REPLACEMENT);
+  EXPECT_EQ(changes.modifiedRegisters, regs);
+  EXPECT_EQ(changes.modifiedRegisterValues, regVals);
+}
+
+TEST_F(AArch64ArchitectureTest, getMaxInstructionSize) {
+  EXPECT_EQ(arch->getMaxInstructionSize(), 4);
+}
+
+TEST_F(AArch64ArchitectureTest, getVectorLength) {
+  EXPECT_EQ(arch->getVectorLength(), 512);
+}
+
+TEST_F(AArch64ArchitectureTest, getStreamingVectorLength) {
+  // Default SVL value is 128
+  EXPECT_EQ(arch->getStreamingVectorLength(), 128);
+}
+
+TEST_F(AArch64ArchitectureTest, updateSystemTimerRegisters) {
+  RegisterFileSet regFile = config::SimInfo::getArchRegStruct();
+
+  uint8_t vctCount = 0;
+  // In A64FX, Timer frequency = (2.5 * 1e9) / (100 * 1e6) = 18
+  uint64_t vctModulo =
+      (config::SimInfo::getConfig()["Core"]["Clock-Frequency-GHz"].as<float>() *
+       1e9) /
+      (config::SimInfo::getConfig()["Core"]["Timer-Frequency-MHz"]
+           .as<uint32_t>() *
+       1e6);
+  for (int i = 0; i < 30; i++) {
+    vctCount += (i % vctModulo) == 0 ? 1 : 0;
+    arch->updateSystemTimerRegisters(&regFile, i);
+    EXPECT_EQ(
+        regFile
+            .get({RegisterType::SYSTEM, (uint16_t)arch->getSystemRegisterTag(
+                                            ARM64_SYSREG_PMCCNTR_EL0)})
+            .get<uint64_t>(),
+        i);
+    EXPECT_EQ(
+        regFile
+            .get({RegisterType::SYSTEM, (uint16_t)arch->getSystemRegisterTag(
+                                            ARM64_SYSREG_CNTVCT_EL0)})
+            .get<uint64_t>(),
+        vctCount);
+  }
+}
+
+TEST_F(AArch64ArchitectureTest, getExecutionInfo) {
+  MacroOp insn;
+  uint64_t bytes = arch->predecode(validInstrBytes.data(),
+                                   validInstrBytes.size(), 0x4, insn);
+  // Insn[0] = fdivr z1.s, p0/m, z1.s, z0.s
+  Instruction* aarch64Insn = reinterpret_cast<Instruction*>(insn[0].get());
+  EXPECT_EQ(bytes, 4);
+  EXPECT_EQ(aarch64Insn->getInstructionAddress(), 0x4);
+  EXPECT_EQ(aarch64Insn->exceptionEncountered(), false);
+
+  ExecutionInfo info = arch->getExecutionInfo(*aarch64Insn);
+
+  // Latencies and Port numbers from a64fx.yaml
+  EXPECT_EQ(info.latency, 98);
+  EXPECT_EQ(info.stallCycles, 98);
+  std::vector<uint16_t> ports = {0};
+  EXPECT_EQ(info.ports, ports);
+}
+
+TEST_F(AArch64ArchitectureTest, get_set_SVCRVal) {
+  EXPECT_EQ(arch->getSVCRval(), 0);
+  arch->setSVCRval(3);
+  EXPECT_EQ(arch->getSVCRval(), 3);
+}
+
+}  // namespace aarch64
+}  // namespace arch
+}  // namespace simeng
diff --git a/test/unit/aarch64/AuxiliaryFunctionsTest.cc b/test/unit/aarch64/AuxiliaryFunctionsTest.cc
new file mode 100644
index 0000000000..554280f944
--- /dev/null
+++ b/test/unit/aarch64/AuxiliaryFunctionsTest.cc
@@ -0,0 +1,632 @@
+#include "gtest/gtest.h"
+#include "simeng/arch/aarch64/helpers/auxiliaryFunctions.hh"
+
+namespace simeng {
+namespace arch {
+namespace aarch64 {
+
+/** `nzcv` Tests */
+TEST(AArch64AuxiliaryFunctionTest, NzcvTest) {
+  EXPECT_EQ(nzcv(true, true, true, true), 0b00001111);
+  EXPECT_EQ(nzcv(false, false, false, false), 0b00000000);
+  EXPECT_EQ(nzcv(true, false, false, true), 0b00001001);
+  EXPECT_EQ(nzcv(false, true, false, false), 0b00000100);
+}
+
+/** `addWithCarry` Tests */
+TEST(AArch64AuxiliaryFunctionTest, AddWithCarry) {
+  std::tuple<uint8_t, uint8_t> u8Res = {111, 0b0010};
+  EXPECT_EQ(addWithCarry<uint8_t>(123, 244, false), u8Res);
+
+  std::tuple<uint16_t, uint8_t> u16Res = {0xFFFD, 0b1000};
+  EXPECT_EQ(addWithCarry<uint16_t>(0xFFF0, 0x000C, true), u16Res);
+
+  std::tuple<uint32_t, uint8_t> u32Res = {2147483649, 0b1001};
+  EXPECT_EQ(addWithCarry<uint32_t>(1, 2147483647, true), u32Res);
+
+  std::tuple<uint64_t, uint8_t> u64Res = {0, 0b0110};
+  EXPECT_EQ(addWithCarry<uint64_t>(0xFFFFFFFFFFFFFFFF, 1, false), u64Res);
+}
+
+/** `bitfieldManipulate` Tests */
+TEST(AArch64AuxiliaryFunctionTest, BitfieldManipulate) {
+  // uint8
+  EXPECT_EQ(bitfieldManipulate<uint8_t>(0xFF, 12, 2, 1, false), 204);
+  EXPECT_EQ(bitfieldManipulate<uint8_t>(16, 3, 0xFF, 24, false), 3);
+  EXPECT_EQ(bitfieldManipulate<uint8_t>(0, 64, 4, 8, false), 64);
+  EXPECT_EQ(bitfieldManipulate<uint8_t>(64, 0, 8, 4, false), 0);
+
+  EXPECT_EQ(bitfieldManipulate<uint8_t>(0xFF, 12, 2, 1, true), 204);
+  EXPECT_EQ(bitfieldManipulate<uint8_t>(16, 3, 0xFF, 24, true), 3);
+  EXPECT_EQ(bitfieldManipulate<uint8_t>(0, 64, 4, 8, true), 0);
+  EXPECT_EQ(bitfieldManipulate<uint8_t>(64, 8, 8, 4, true), 0);
+
+  // uint16
+  EXPECT_EQ(bitfieldManipulate<uint16_t>(0xFFFF, 12, 2, 1, false), 49164);
+  EXPECT_EQ(bitfieldManipulate<uint16_t>(16, 3, 0xFF, 24, false), 3);
+  EXPECT_EQ(bitfieldManipulate<uint16_t>(0, 64, 4, 8, false), 64);
+  EXPECT_EQ(bitfieldManipulate<uint16_t>(64, 0, 8, 4, false), 0);
+
+  EXPECT_EQ(bitfieldManipulate<uint16_t>(0xFFFF, 12, 2, 1, true), 49164);
+  EXPECT_EQ(bitfieldManipulate<uint16_t>(16, 3, 0xFF, 24, true), 3);
+  EXPECT_EQ(bitfieldManipulate<uint16_t>(0, 64, 4, 8, true), 0);
+  EXPECT_EQ(bitfieldManipulate<uint16_t>(64, 8, 8, 4, true), 8);
+
+  // uint32
+  EXPECT_EQ(bitfieldManipulate<uint32_t>(0xFFFFFFFF, 12, 2, 1, false),
+            3221225484);
+  EXPECT_EQ(bitfieldManipulate<uint32_t>(16, 3, 0xFF, 24, false), 33);
+  EXPECT_EQ(bitfieldManipulate<uint32_t>(0, 64, 4, 8, false), 64);
+  EXPECT_EQ(bitfieldManipulate<uint32_t>(64, 0, 8, 4, false), 0);
+
+  EXPECT_EQ(bitfieldManipulate<uint32_t>(0xFFFFFFFF, 12, 2, 1, true),
+            3221225484);
+  EXPECT_EQ(bitfieldManipulate<uint32_t>(16, 3, 0xFF, 24, true), 33);
+  EXPECT_EQ(bitfieldManipulate<uint32_t>(0, 64, 4, 8, true), 0);
+  EXPECT_EQ(bitfieldManipulate<uint32_t>(64, 8, 8, 4, true), 8);
+
+  // uint64
+  EXPECT_EQ(bitfieldManipulate<uint64_t>(0xFFFFFFFFFFFFFFFF, 12, 2, 1, false),
+            13835058055282163724u);
+  EXPECT_EQ(bitfieldManipulate<uint64_t>(16, 3, 0xFF, 24, false), 33);
+  EXPECT_EQ(bitfieldManipulate<uint64_t>(0, 64, 4, 8, false), 64);
+  EXPECT_EQ(bitfieldManipulate<uint64_t>(64, 0, 8, 4, false), 0);
+
+  EXPECT_EQ(bitfieldManipulate<uint64_t>(0xFFFFFFFFFFFFFFFF, 12, 2, 1, true),
+            13835058055282163724u);
+  EXPECT_EQ(bitfieldManipulate<uint64_t>(16, 3, 0xFF, 24, true), 33);
+  EXPECT_EQ(bitfieldManipulate<uint64_t>(0, 64, 4, 8, true), 0);
+  EXPECT_EQ(bitfieldManipulate<uint64_t>(64, 8, 8, 4, true), 8);
+}
+
+/** `conditionHolds` Tests */
+TEST(AArch64AuxiliaryFunctionTest, ConditionHolds) {
+  // Run each condition at least twice, one which we expect to be true, one we
+  // expect to be false
+
+  // Inverse False
+  // EQ/NE
+  EXPECT_TRUE(conditionHolds(0b0000, 0b0100));
+  EXPECT_FALSE(conditionHolds(0b0000, 0b1011));
+
+  // CS/CC
+  EXPECT_TRUE(conditionHolds(0b0010, 0b0010));
+  EXPECT_FALSE(conditionHolds(0b0010, 0b1101));
+
+  // MI/PL
+  EXPECT_TRUE(conditionHolds(0b0100, 0b1000));
+  EXPECT_FALSE(conditionHolds(0b0100, 0b0111));
+
+  // VS/VC
+  EXPECT_TRUE(conditionHolds(0b0110, 0b0001));
+  EXPECT_FALSE(conditionHolds(0b0110, 0b1110));
+
+  // HI/LS
+  EXPECT_TRUE(conditionHolds(0b1000, 0b1010));
+  EXPECT_FALSE(conditionHolds(0b1000, 0b1111));
+  EXPECT_FALSE(conditionHolds(0b1000, 0b1001));
+
+  // GE/LT
+  EXPECT_TRUE(conditionHolds(0b1010, 0b1001));
+  EXPECT_TRUE(conditionHolds(0b1010, 0b0000));
+  EXPECT_FALSE(conditionHolds(0b1010, 0b1000));
+
+  // GT/LE
+  EXPECT_TRUE(conditionHolds(0b1100, 0b1001));
+  EXPECT_TRUE(conditionHolds(0b1100, 0b0000));
+  EXPECT_FALSE(conditionHolds(0b1100, 0b0001));
+  EXPECT_FALSE(conditionHolds(0b1100, 0b1000));
+  EXPECT_FALSE(conditionHolds(0b1100, 0b1101));
+
+  // Condition of 0b111 always returns `true`
+  // AL
+  EXPECT_TRUE(conditionHolds(0b1110, 0b1111));
+  EXPECT_TRUE(conditionHolds(0b1110, 0b0000));
+
+  // Inverse True
+  // EQ/NE
+  EXPECT_FALSE(conditionHolds(0b0001, 0b0100));
+  EXPECT_TRUE(conditionHolds(0b0001, 0b1011));
+
+  // CS/CC
+  EXPECT_FALSE(conditionHolds(0b0011, 0b0010));
+  EXPECT_TRUE(conditionHolds(0b0011, 0b1101));
+
+  // MI/PL
+  EXPECT_FALSE(conditionHolds(0b0101, 0b1000));
+  EXPECT_TRUE(conditionHolds(0b0101, 0b0111));
+
+  // VS/VC
+  EXPECT_FALSE(conditionHolds(0b0111, 0b0001));
+  EXPECT_TRUE(conditionHolds(0b0111, 0b1110));
+
+  // HI/LS
+  EXPECT_FALSE(conditionHolds(0b1001, 0b1010));
+  EXPECT_TRUE(conditionHolds(0b1001, 0b1111));
+  EXPECT_TRUE(conditionHolds(0b1001, 0b1001));
+
+  // GE/LT
+  EXPECT_FALSE(conditionHolds(0b1011, 0b1001));
+  EXPECT_FALSE(conditionHolds(0b1011, 0b0000));
+  EXPECT_TRUE(conditionHolds(0b1011, 0b1000));
+
+  // GT/LE
+  EXPECT_FALSE(conditionHolds(0b1101, 0b1001));
+  EXPECT_FALSE(conditionHolds(0b1101, 0b0000));
+  EXPECT_TRUE(conditionHolds(0b1101, 0b0001));
+  EXPECT_TRUE(conditionHolds(0b1101, 0b1000));
+  EXPECT_TRUE(conditionHolds(0b1101, 0b1101));
+
+  // AL
+  // Cond=0b111 and inverse of 1 always returns `true`
+  EXPECT_TRUE(conditionHolds(0b1111, 0b1111));
+  EXPECT_TRUE(conditionHolds(0b1111, 0b0000));
+}
+
+/** `extendValue` Tests */
+TEST(AArch64AuxiliaryFunctionTest, ExtendValue) {
+  // Test special case
+  EXPECT_EQ(extendValue(123, ARM64_EXT_INVALID, 0), 123);
+
+  // Results validated on XCI and A64FX hardware
+  EXPECT_EQ(extendValue(270, ARM64_EXT_UXTB, 3), 112);
+  EXPECT_EQ(extendValue(65560, ARM64_EXT_UXTH, 3), 192);
+  EXPECT_EQ(extendValue(0xFFFFFFFF, ARM64_EXT_UXTW, 3), 34359738360);
+  EXPECT_EQ(extendValue(0x0F0F0F0F0F0F0F01, ARM64_EXT_UXTX, 4),
+            0xF0F0F0F0F0F0F010);
+
+  EXPECT_EQ(extendValue(133, ARM64_EXT_SXTB, 3), -984);
+  EXPECT_EQ(extendValue(32768, ARM64_EXT_SXTH, 3), -262144);
+  EXPECT_EQ(extendValue(2147483648, ARM64_EXT_SXTW, 3), -17179869184);
+  EXPECT_EQ(extendValue(0x8000000000000000, ARM64_EXT_SXTX, 3), 0);
+}
+
+/** `getNZCVfromPred` Tests */
+TEST(AArch64AuxiliaryFunctionTest, getNZCVfromPred) {
+  uint64_t vl = 128;
+  // VL 128 will only use array[0]
+  EXPECT_EQ(getNZCVfromPred(
+                {0, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+                vl, 1),
+            0b0110);
+  EXPECT_EQ(
+      getNZCVfromPred({0xFFFFFFFFFFFFFFFF, 0, 0, 0x300000000000000C}, vl, 2),
+      0b1000);
+  EXPECT_EQ(getNZCVfromPred(
+                {0xE000000000000000, 0xE000000000000000, 0xE000000000000000, 0},
+                vl, 4),
+            0b0010);
+  EXPECT_EQ(getNZCVfromPred({0, 0x8000000000000001, 0, 0}, vl, 8), 0b0110);
+
+  vl = 256;
+  // VL 256 will only use array[0]
+  EXPECT_EQ(getNZCVfromPred(
+                {0, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+                vl, 1),
+            0b0110);
+  EXPECT_EQ(
+      getNZCVfromPred({0xFFFFFFFFFFFFFFFF, 0, 0, 0x300000000000000C}, vl, 2),
+      0b1000);
+  EXPECT_EQ(getNZCVfromPred(
+                {0xE000000000000000, 0xE000000000000000, 0xE000000000000000, 0},
+                vl, 4),
+            0b0010);
+  EXPECT_EQ(getNZCVfromPred({0, 0x8000000000000001, 0, 0}, vl, 8), 0b0110);
+
+  vl = 512;
+  // VL 512 will only use array[0]
+  EXPECT_EQ(getNZCVfromPred(
+                {0, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+                vl, 1),
+            0b0110);
+  EXPECT_EQ(
+      getNZCVfromPred({0xFFFFFFFFFFFFFFFF, 0, 0, 0x300000000000000C}, vl, 2),
+      0b1000);
+  EXPECT_EQ(getNZCVfromPred(
+                {0xE000000000000000, 0xE000000000000000, 0xE000000000000000, 0},
+                vl, 4),
+            0b0010);
+  EXPECT_EQ(getNZCVfromPred({0, 0x8000000000000001, 0, 0}, vl, 8), 0b0110);
+
+  vl = 1024;
+  // VL 1024 will only use array[0, 1]
+  EXPECT_EQ(getNZCVfromPred(
+                {0, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+                vl, 1),
+            0b0000);
+  EXPECT_EQ(
+      getNZCVfromPred({0xFFFFFFFFFFFFFFFF, 0, 0, 0x300000000000000C}, vl, 2),
+      0b1010);
+  EXPECT_EQ(getNZCVfromPred(
+                {0xE000000000000000, 0xE000000000000000, 0xE000000000000000, 0},
+                vl, 4),
+            0b0010);
+  EXPECT_EQ(getNZCVfromPred({0, 0x8000000000000000, 0, 0}, vl, 8), 0b0010);
+
+  vl = 2048;
+  // VL 2048 will only use array[0, 1, 2, 3]
+  EXPECT_EQ(getNZCVfromPred(
+                {0, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},
+                vl, 1),
+            0b0000);
+  EXPECT_EQ(
+      getNZCVfromPred({0xFFFFFFFFFFFFFFFF, 0, 0, 0x300000000000000C}, vl, 2),
+      0b1010);
+  EXPECT_EQ(getNZCVfromPred(
+                {0xE000000000000000, 0xE000000000000000, 0xE000000000000000, 0},
+                vl, 4),
+            0b0010);
+  EXPECT_EQ(getNZCVfromPred({0, 0x8000000000000001, 0, 0}, vl, 8), 0b0010);
+}
+
+/** `mulhi` Tests */
+TEST(AArch64AuxiliaryFunctionTest, Mulhi) {
+  EXPECT_EQ(mulhi(0xFFFFFFFFFFFFFFFF, 2), 1);
+  EXPECT_EQ(mulhi(1, 245), 0);
+
+  EXPECT_EQ(mulhi(0xF000000000000000, 4), 3);
+  EXPECT_EQ(mulhi(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), 0xFFFFFFFFFFFFFFFE);
+  EXPECT_EQ(mulhi(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF00000000), 0xFFFFFFFEFFFFFFFF);
+  EXPECT_EQ(mulhi(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF), 0xFFFFFFFE);
+}
+
+/** `sveGetPattern` Tests */
+TEST(AArch64AuxiliaryFunctionTest, sveGetPattern) {
+  uint16_t vl = 128;
+  EXPECT_EQ(sveGetPattern("", 64, vl), 2);
+  EXPECT_EQ(sveGetPattern("", 16, vl), 8);
+  EXPECT_EQ(sveGetPattern("all", 64, vl), 2);
+  EXPECT_EQ(sveGetPattern("all", 16, vl), 8);
+  EXPECT_EQ(sveGetPattern("notValid", 64, vl), 2);
+  EXPECT_EQ(sveGetPattern("notValid", 16, vl), 8);
+
+  EXPECT_EQ(sveGetPattern("vl1", 64, vl), 1);
+  EXPECT_EQ(sveGetPattern("vl2", 64, vl), 2);
+  EXPECT_EQ(sveGetPattern("vl3", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl4", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl5", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl6", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl7", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl8", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl16", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl32", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl64", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl128", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl256", 64, vl), 0);
+
+  EXPECT_EQ(sveGetPattern("mul4", 8, vl), 16);
+  EXPECT_EQ(sveGetPattern("mul3", 8, vl), 15);
+
+  vl = 256;
+  EXPECT_EQ(sveGetPattern("", 64, vl), 4);
+  EXPECT_EQ(sveGetPattern("", 16, vl), 16);
+  EXPECT_EQ(sveGetPattern("all", 64, vl), 4);
+  EXPECT_EQ(sveGetPattern("all", 16, vl), 16);
+  EXPECT_EQ(sveGetPattern("notValid", 64, vl), 4);
+  EXPECT_EQ(sveGetPattern("notValid", 16, vl), 16);
+
+  EXPECT_EQ(sveGetPattern("vl1", 64, vl), 1);
+  EXPECT_EQ(sveGetPattern("vl2", 64, vl), 2);
+  EXPECT_EQ(sveGetPattern("vl3", 64, vl), 3);
+  EXPECT_EQ(sveGetPattern("vl4", 64, vl), 4);
+  EXPECT_EQ(sveGetPattern("vl5", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl6", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl7", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl8", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl16", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl32", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl64", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl128", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl256", 64, vl), 0);
+
+  EXPECT_EQ(sveGetPattern("mul4", 8, vl), 32);
+  EXPECT_EQ(sveGetPattern("mul3", 8, vl), 30);
+
+  vl = 512;
+  EXPECT_EQ(sveGetPattern("", 64, vl), 8);
+  EXPECT_EQ(sveGetPattern("", 16, vl), 32);
+  EXPECT_EQ(sveGetPattern("all", 64, vl), 8);
+  EXPECT_EQ(sveGetPattern("all", 16, vl), 32);
+  EXPECT_EQ(sveGetPattern("notValid", 64, vl), 8);
+  EXPECT_EQ(sveGetPattern("notValid", 16, vl), 32);
+
+  EXPECT_EQ(sveGetPattern("vl1", 64, vl), 1);
+  EXPECT_EQ(sveGetPattern("vl2", 64, vl), 2);
+  EXPECT_EQ(sveGetPattern("vl3", 64, vl), 3);
+  EXPECT_EQ(sveGetPattern("vl4", 64, vl), 4);
+  EXPECT_EQ(sveGetPattern("vl5", 64, vl), 5);
+  EXPECT_EQ(sveGetPattern("vl6", 64, vl), 6);
+  EXPECT_EQ(sveGetPattern("vl7", 64, vl), 7);
+  EXPECT_EQ(sveGetPattern("vl8", 64, vl), 8);
+  EXPECT_EQ(sveGetPattern("vl16", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl32", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl64", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl128", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl256", 64, vl), 0);
+
+  EXPECT_EQ(sveGetPattern("mul4", 8, vl), 64);
+  EXPECT_EQ(sveGetPattern("mul3", 8, vl), 63);
+
+  vl = 1024;
+  EXPECT_EQ(sveGetPattern("", 64, vl), 16);
+  EXPECT_EQ(sveGetPattern("", 16, vl), 64);
+  EXPECT_EQ(sveGetPattern("all", 64, vl), 16);
+  EXPECT_EQ(sveGetPattern("all", 16, vl), 64);
+  EXPECT_EQ(sveGetPattern("notValid", 64, vl), 16);
+  EXPECT_EQ(sveGetPattern("notValid", 16, vl), 64);
+
+  EXPECT_EQ(sveGetPattern("vl1", 64, vl), 1);
+  EXPECT_EQ(sveGetPattern("vl2", 64, vl), 2);
+  EXPECT_EQ(sveGetPattern("vl3", 64, vl), 3);
+  EXPECT_EQ(sveGetPattern("vl4", 64, vl), 4);
+  EXPECT_EQ(sveGetPattern("vl5", 64, vl), 5);
+  EXPECT_EQ(sveGetPattern("vl6", 64, vl), 6);
+  EXPECT_EQ(sveGetPattern("vl7", 64, vl), 7);
+  EXPECT_EQ(sveGetPattern("vl8", 64, vl), 8);
+  EXPECT_EQ(sveGetPattern("vl16", 64, vl), 16);
+  EXPECT_EQ(sveGetPattern("vl32", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl64", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl128", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl256", 64, vl), 0);
+
+  EXPECT_EQ(sveGetPattern("mul4", 8, vl), 128);
+  EXPECT_EQ(sveGetPattern("mul3", 8, vl), 126);
+
+  vl = 2048;
+  EXPECT_EQ(sveGetPattern("", 64, vl), 32);
+  EXPECT_EQ(sveGetPattern("", 16, vl), 128);
+  EXPECT_EQ(sveGetPattern("all", 64, vl), 32);
+  EXPECT_EQ(sveGetPattern("all", 16, vl), 128);
+  EXPECT_EQ(sveGetPattern("notValid", 64, vl), 32);
+  EXPECT_EQ(sveGetPattern("notValid", 16, vl), 128);
+
+  EXPECT_EQ(sveGetPattern("vl1", 64, vl), 1);
+  EXPECT_EQ(sveGetPattern("vl2", 64, vl), 2);
+  EXPECT_EQ(sveGetPattern("vl3", 64, vl), 3);
+  EXPECT_EQ(sveGetPattern("vl4", 64, vl), 4);
+  EXPECT_EQ(sveGetPattern("vl5", 64, vl), 5);
+  EXPECT_EQ(sveGetPattern("vl6", 64, vl), 6);
+  EXPECT_EQ(sveGetPattern("vl7", 64, vl), 7);
+  EXPECT_EQ(sveGetPattern("vl8", 64, vl), 8);
+  EXPECT_EQ(sveGetPattern("vl16", 64, vl), 16);
+  EXPECT_EQ(sveGetPattern("vl32", 64, vl), 32);
+  EXPECT_EQ(sveGetPattern("vl64", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl128", 64, vl), 0);
+  EXPECT_EQ(sveGetPattern("vl256", 64, vl), 0);
+
+  EXPECT_EQ(sveGetPattern("mul4", 8, vl), 256);
+  EXPECT_EQ(sveGetPattern("mul3", 8, vl), 255);
+}
+
+/** `ShiftValue` Tests */
+TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_LSL) {
+  // 8-bit
+  const uint8_t a = 0x0F;
+  EXPECT_EQ(shiftValue(a, ARM64_SFT_LSL, 4), 0xF0);
+
+  const uint8_t b = 0xF0;
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_LSL, 7), 0x00);
+
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_LSL, 0), b);
+
+  // 16-bit
+  const uint16_t c = 0x00FF;
+  EXPECT_EQ(shiftValue(c, ARM64_SFT_LSL, 8), 0xFF00);
+
+  const uint16_t d = 0xFF00;
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_LSL, 15), 0x0000);
+
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_LSL, 0), d);
+
+  // 32-bit
+  const uint32_t e = 0x0000FFFF;
+  EXPECT_EQ(shiftValue(e, ARM64_SFT_LSL, 16), 0xFFFF0000);
+
+  const uint32_t f = 0xFFFF0000;
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_LSL, 31), 0x00000000);
+
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_LSL, 0), f);
+
+  // 64-bit
+  const uint64_t g = 0x00000000FFFFFFFF;
+  EXPECT_EQ(shiftValue(g, ARM64_SFT_LSL, 32), 0xFFFFFFFF00000000);
+
+  const uint64_t h = 0xFFFFFFFF00000000;
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_LSL, 63), 0x0000000000000000);
+
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_LSL, 0), h);
+}
+
+TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_LSR) {
+  // 8-bit
+  const uint8_t a = 0x0F;
+  EXPECT_EQ(shiftValue(a, ARM64_SFT_LSR, 4), 0x00);
+
+  const uint8_t b = 0xF0;
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_LSR, 7), 0x01);
+
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_LSR, 0), b);
+
+  // 16-bit
+  const uint16_t c = 0x00FF;
+  EXPECT_EQ(shiftValue(c, ARM64_SFT_LSR, 8), 0x0);
+
+  const uint16_t d = 0xFF00;
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_LSR, 15), 0x0001);
+
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_LSR, 0), d);
+
+  // 32-bit
+  const uint32_t e = 0x0000FFFF;
+  EXPECT_EQ(shiftValue(e, ARM64_SFT_LSR, 16), 0x00000000);
+
+  const uint32_t f = 0xFFFF0000;
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_LSR, 31), 0x00000001);
+
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_LSR, 0), f);
+
+  // 64-bit
+  const uint64_t g = 0x00000000FFFFFFFF;
+  EXPECT_EQ(shiftValue(g, ARM64_SFT_LSR, 32), 0x0000000000000000);
+
+  const uint64_t h = 0xFFFFFFFF00000000;
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_LSR, 63), 0x0000000000000001);
+
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_LSR, 0), h);
+}
+
+TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_ASR) {
+  // 8-bit
+  const uint8_t a = 0x0F;
+  EXPECT_EQ(shiftValue(a, ARM64_SFT_ASR, 4), 0x00);
+
+  const uint8_t b = 0xF0;
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_ASR, 7), 0xFF);
+
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_ASR, 0), b);
+
+  // 16-bit
+  const uint16_t c = 0x00FF;
+  EXPECT_EQ(shiftValue(c, ARM64_SFT_ASR, 8), 0x0000);
+
+  const uint16_t d = 0xFF00;
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_ASR, 15), 0xFFFF);
+
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_ASR, 0), d);
+
+  // 32-bit
+  const uint32_t e = 0x0000FFFF;
+  EXPECT_EQ(shiftValue(e, ARM64_SFT_ASR, 16), 0x00000000);
+
+  const uint32_t f = 0xFFFF0000;
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_ASR, 31), 0xFFFFFFFF);
+
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_ASR, 0), f);
+
+  // 64-bit
+  const uint64_t g = 0x00000000FFFFFFFF;
+  EXPECT_EQ(shiftValue(g, ARM64_SFT_ASR, 32), 0x0000000000000000);
+
+  const uint64_t h = 0xFFFFFFFF00000000;
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_ASR, 63), 0xFFFFFFFFFFFFFFFF);
+
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_ASR, 0), h);
+}
+
+TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_ROR) {
+  // 8-bit
+  const uint8_t a = 0x0F;
+  EXPECT_EQ(shiftValue(a, ARM64_SFT_ROR, 4), 0xF0);
+
+  const uint8_t b = 0xF0;
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_ROR, 7), 0xE1);
+
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_ROR, 0), b);
+
+  // 16-bit
+  const uint16_t c = 0x00FF;
+  EXPECT_EQ(shiftValue(c, ARM64_SFT_ROR, 8), 0xFF00);
+
+  const uint16_t d = 0xFF00;
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_ROR, 15), 0xFE01);
+
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_ROR, 0), d);
+
+  // 32-bit
+  const uint32_t e = 0x0000FFFF;
+  EXPECT_EQ(shiftValue(e, ARM64_SFT_ROR, 16), 0xFFFF0000);
+
+  const uint32_t f = 0xFFFF0000;
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_ROR, 31), 0xFFFE0001);
+
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_ROR, 0), f);
+
+  // 64-bit
+  const uint64_t g = 0x00000000FFFFFFFF;
+  EXPECT_EQ(shiftValue(g, ARM64_SFT_ROR, 32), 0xFFFFFFFF00000000);
+
+  const uint64_t h = 0xFFFFFFFF00000000;
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_ROR, 63), 0xFFFFFFFE00000001);
+
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_ROR, 0), h);
+}
+
+TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_MSL) {
+  // 8-bit
+  const uint8_t a = 0x0F;
+  EXPECT_EQ(shiftValue(a, ARM64_SFT_MSL, 4), 0xFF);
+
+  const uint8_t b = 0xF0;
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_MSL, 7), 0x7F);
+
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_MSL, 0), b);
+
+  // 16-bit
+  const uint16_t c = 0x00FF;
+  EXPECT_EQ(shiftValue(c, ARM64_SFT_MSL, 8), 0xFFFF);
+
+  const uint16_t d = 0xFF00;
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_MSL, 15), 0x7FFF);
+
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_MSL, 0), d);
+
+  // 32-bit
+  const uint32_t e = 0x0000FFFF;
+  EXPECT_EQ(shiftValue(e, ARM64_SFT_MSL, 16), 0xFFFFFFFF);
+
+  const uint32_t f = 0xFFFF0000;
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_MSL, 31), 0x7FFFFFFF);
+
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_MSL, 0), f);
+
+  // 64-bit
+  const uint64_t g = 0x00000000FFFFFFFF;
+  EXPECT_EQ(shiftValue(g, ARM64_SFT_MSL, 32), 0xFFFFFFFFFFFFFFFF);
+
+  const uint64_t h = 0xFFFFFFFF00000000;
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_MSL, 63), 0x7FFFFFFFFFFFFFFF);
+
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_MSL, 0), h);
+}
+
+TEST(AArch64AuxiliaryFunctionTest, ShiftValueTest_INVALID) {
+  // 8-bit
+  const uint8_t a = 0x0F;
+  EXPECT_EQ(shiftValue(a, ARM64_SFT_INVALID, 4), a);
+
+  const uint8_t b = 0xF0;
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_INVALID, 7), b);
+
+  EXPECT_EQ(shiftValue(b, ARM64_SFT_INVALID, 0), b);
+
+  // 16-bit
+  const uint16_t c = 0x00FF;
+  EXPECT_EQ(shiftValue(c, ARM64_SFT_INVALID, 8), c);
+
+  const uint16_t d = 0xFF00;
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_INVALID, 15), d);
+
+  EXPECT_EQ(shiftValue(d, ARM64_SFT_INVALID, 0), d);
+
+  // 32-bit
+  const uint32_t e = 0x0000FFFF;
+  EXPECT_EQ(shiftValue(e, ARM64_SFT_INVALID, 16), e);
+
+  const uint32_t f = 0xFFFF0000;
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_INVALID, 31), f);
+
+  EXPECT_EQ(shiftValue(f, ARM64_SFT_INVALID, 0), f);
+
+  // 64-bit
+  const uint64_t g = 0x00000000FFFFFFFF;
+  EXPECT_EQ(shiftValue(g, ARM64_SFT_INVALID, 32), g);
+
+  const uint64_t h = 0xFFFFFFFF00000000;
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_INVALID, 63), h);
+
+  EXPECT_EQ(shiftValue(h, ARM64_SFT_INVALID, 0), h);
+}
+
+}  // namespace aarch64
+}  // namespace arch
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/aarch64/ExceptionHandlerTest.cc b/test/unit/aarch64/ExceptionHandlerTest.cc
new file mode 100644
index 0000000000..72ce6f1a22
--- /dev/null
+++ b/test/unit/aarch64/ExceptionHandlerTest.cc
@@ -0,0 +1,708 @@
+#include "../ConfigInit.hh"
+#include "../MockCore.hh"
+#include "../MockInstruction.hh"
+#include "../MockMemoryInterface.hh"
+#include "gmock/gmock.h"
+#include "simeng/ArchitecturalRegisterFileSet.hh"
+#include "simeng/arch/aarch64/Architecture.hh"
+#include "simeng/arch/aarch64/ExceptionHandler.hh"
+#include "simeng/arch/aarch64/Instruction.hh"
+
+namespace simeng {
+namespace arch {
+namespace aarch64 {
+
+using ::testing::HasSubstr;
+using ::testing::Return;
+using ::testing::ReturnRef;
+
+class AArch64ExceptionHandlerTest : public ::testing::Test {
+ public:
+  AArch64ExceptionHandlerTest()
+      : kernel(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"]
+                   .as<std::string>()),
+        arch(kernel),
+        physRegFileSet(config::SimInfo::getArchRegStruct()),
+        archRegFileSet(physRegFileSet) {}
+
+ protected:
+  ConfigInit configInit = ConfigInit(config::ISA::AArch64, "");
+
+  MockCore core;
+  MockMemoryInterface memory;
+  kernel::Linux kernel;
+  Architecture arch;
+
+  RegisterFileSet physRegFileSet;
+  ArchitecturalRegisterFileSet archRegFileSet;
+
+  // fdivr z1.s, p0/m, z1.s, z0.s --- Just need a valid instruction to hijack
+  const std::array<uint8_t, 4> validInstrBytes = {0x01, 0x80, 0x8c, 0x65};
+
+  /** Helper constants for AArch64 general-purpose registers. */
+  static constexpr Register R0 = {RegisterType::GENERAL, 0};
+  static constexpr Register R1 = {RegisterType::GENERAL, 1};
+  static constexpr Register R2 = {RegisterType::GENERAL, 2};
+  static constexpr Register R3 = {RegisterType::GENERAL, 3};
+  static constexpr Register R4 = {RegisterType::GENERAL, 4};
+  static constexpr Register R5 = {RegisterType::GENERAL, 5};
+  static constexpr Register R8 = {RegisterType::GENERAL, 8};
+};
+
+// The following exceptions are tested in /test/regression/aarch64/Exception.cc
+// - InstructionException::StreamingModeUpdate,
+// - InstructionException::ZAregisterStatusUpdate,
+// - InstructionException::SMZAUpdate
+// All system calls are tested in /test/regression/aarch64/Syscall.cc
+
+// Test that a syscall is processed sucessfully
+TEST_F(AArch64ExceptionHandlerTest, testSyscall) {
+  // Create "syscall" instruction
+  uint64_t insnAddr = 0x4;
+  MacroOp uops;
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  InstructionException exception = InstructionException::SupervisorCall;
+  std::shared_ptr<Instruction> insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  insn->setInstructionAddress(insnAddr);
+
+  // Setup register file for `uname` syscall (chosen as minimal functionality)
+  archRegFileSet.set(R0, RegisterValue(1234, 8));
+  archRegFileSet.set(R8, RegisterValue(160, 8));
+
+  // Create ExceptionHandler
+  ExceptionHandler handler(insn, core, memory, kernel);
+
+  // Tick exceptionHandler
+  ON_CALL(core, getArchitecturalRegisterFileSet())
+      .WillByDefault(ReturnRef(archRegFileSet));
+  EXPECT_CALL(core, getArchitecturalRegisterFileSet()).Times(1);
+  bool retVal = handler.tick();
+  ExceptionResult result = handler.getResult();
+
+  EXPECT_TRUE(retVal);
+  EXPECT_FALSE(result.fatal);
+  EXPECT_EQ(result.instructionAddress, insnAddr + 4);
+  EXPECT_EQ(result.stateChange.type, ChangeType::REPLACEMENT);
+  std::vector<Register> modRegs = {R0};
+  EXPECT_EQ(result.stateChange.modifiedRegisters, modRegs);
+  std::vector<RegisterValue> modRegVals = {{0ull, 8}};
+  EXPECT_EQ(result.stateChange.modifiedRegisterValues, modRegVals);
+  std::vector<MemoryAccessTarget> modMemTargets = {{1234, 6},
+                                                   {1234 + 65, 25},
+                                                   {1234 + (65 * 2), 7},
+                                                   {1234 + (65 * 3), 39},
+                                                   {1234 + (65 * 4), 8}};
+  EXPECT_EQ(result.stateChange.memoryAddresses, modMemTargets);
+  std::vector<RegisterValue> modMemVals = {
+      RegisterValue("Linux"), RegisterValue("simeng.hpc.cs.bris.ac.uk"),
+      RegisterValue("4.14.0"),
+      RegisterValue("#1 SimEng Mon Apr 29 16:28:37 UTC 2019"),
+      RegisterValue("aarch64")};
+  EXPECT_EQ(result.stateChange.memoryAddressValues, modMemVals);
+}
+
+// Test that `readStringThen()` operates as expected
+TEST_F(AArch64ExceptionHandlerTest, readStringThen) {
+  // Create new mock instruction and ExceptionHandler
+  std::shared_ptr<MockInstruction> uopPtr(new MockInstruction);
+  ExceptionHandler handler(uopPtr, core, memory, kernel);
+
+  // Initialise variables
+  size_t retVal = 0;
+  char* buffer;
+  buffer = (char*)malloc(256);
+  for (int i = 0; i < 256; i++) {
+    buffer[i] = 'q';
+  }
+  uint64_t addr = 1024;
+  int maxLen = kernel::Linux::LINUX_PATH_MAX;
+
+  MemoryAccessTarget target1 = {addr, 1};
+  MemoryReadResult res1 = {target1, RegisterValue(0xAB, 1), 1};
+  span<MemoryReadResult> res1Span = span<MemoryReadResult>(&res1, 1);
+
+  MemoryAccessTarget target2 = {addr + 1, 1};
+  MemoryReadResult res2 = {target2, RegisterValue(static_cast<int>('\0'), 1),
+                           1};
+  span<MemoryReadResult> res2Span = span<MemoryReadResult>(&res2, 1);
+
+  // On first call to readStringThen, expect return of false and retVal to still
+  // be 0, and buffer to be filled with `q`
+  MemoryAccessTarget tar = {addr, 1};
+  EXPECT_CALL(memory, requestRead(tar, 0)).Times(1);
+  bool outcome =
+      handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) {
+        retVal = length;
+        return true;
+      });
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  for (int i = 0; i < 256; i++) {
+    EXPECT_EQ(buffer[i], 'q');
+  }
+
+  // ResumeHandling (called on tick()) should now be set to `readStringThen()`
+  // so call this for our second pass.
+  ON_CALL(memory, getCompletedReads())
+      .WillByDefault(Return(span<MemoryReadResult>()));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  outcome = handler.tick();
+  // No memory reads completed yet so again expect to return false and no change
+  // to `retval` or buffer
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  for (int i = 0; i < 256; i++) {
+    EXPECT_EQ(buffer[i], 'q');
+  }
+
+  // Call tick() again, but mimic a memory read completing
+  tar = {addr + 1, 1};
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res1Span));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  EXPECT_CALL(memory, requestRead(tar, 0)).Times(1);
+  outcome = handler.tick();
+  // Completed read but still not complete, so outcome should be false, retVal
+  // unchanged, but some data in the buffer
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  for (int i = 0; i < 256; i++) {
+    if (i == 0) {
+      EXPECT_EQ(buffer[i], (char)0xAB);
+    } else {
+      EXPECT_EQ(buffer[i], 'q');
+    }
+  }
+
+  // Call tick() for a final time, getting the final read result
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res2Span));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  outcome = handler.tick();
+  // End of string '\0' found so expect `then()` to have been called, the
+  // outcome to be true, and the buffer again to have updated
+  EXPECT_TRUE(outcome);
+  EXPECT_EQ(retVal, 1);
+  for (int i = 0; i < 256; i++) {
+    if (i == 0) {
+      EXPECT_EQ(buffer[i], (char)0xAB);
+    } else if (i == 1) {
+      EXPECT_EQ(buffer[i], '\0');
+    } else {
+      EXPECT_EQ(buffer[i], 'q');
+    }
+  }
+}
+
+// Test that in `readStringThen()` if max length is 0, then is called straight
+// away
+TEST_F(AArch64ExceptionHandlerTest, readStringThen_maxLen0) {
+  // Create new mock instruction and ExceptionHandler
+  std::shared_ptr<MockInstruction> uopPtr(new MockInstruction);
+  ExceptionHandler handler(uopPtr, core, memory, kernel);
+  size_t retVal = 100;
+  char* buffer;
+  buffer = (char*)malloc(256);
+  for (int i = 0; i < 256; i++) {
+    buffer[i] = 'q';
+  }
+  uint64_t addr = 1024;
+  int maxLen = 0;
+
+  bool outcome =
+      handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) {
+        retVal = length;
+        return true;
+      });
+  EXPECT_TRUE(outcome);
+  EXPECT_EQ(retVal, -1);
+  for (int i = 0; i < 256; i++) {
+    EXPECT_EQ(buffer[i], 'q');
+  }
+}
+
+// Test that in `readStringThen()` if max length has been met, then() is called
+// and no more string is fetched
+TEST_F(AArch64ExceptionHandlerTest, readStringThen_maxLenReached) {
+  // Create new mock instruction and ExceptionHandler
+  std::shared_ptr<MockInstruction> uopPtr(new MockInstruction);
+  ExceptionHandler handler(uopPtr, core, memory, kernel);
+
+  // Initialise variables
+  size_t retVal = 100;
+  char* buffer;
+  buffer = (char*)malloc(256);
+  for (int i = 0; i < 256; i++) {
+    buffer[i] = 'q';
+  }
+  uint64_t addr = 1024;
+  int maxLen = 1;
+
+  MemoryAccessTarget target1 = {addr, 1};
+  MemoryReadResult res1 = {target1, RegisterValue(0xAB, 1), 1};
+  span<MemoryReadResult> res1Span = span<MemoryReadResult>(&res1, 1);
+
+  // On first call to readStringThen, expect return of false and retVal to still
+  // be 0, and buffer to be filled with `q`
+  MemoryAccessTarget tar = {addr, 1};
+  EXPECT_CALL(memory, requestRead(tar, 0)).Times(1);
+  bool outcome =
+      handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) {
+        retVal = length;
+        return true;
+      });
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 100);
+  for (int i = 0; i < 256; i++) {
+    EXPECT_EQ(buffer[i], 'q');
+  }
+
+  // ResumeHandling (called on tick()) should now be set to `readStringThen()`
+  // so call this for our second pass.
+  ON_CALL(memory, getCompletedReads())
+      .WillByDefault(Return(span<MemoryReadResult>()));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  outcome = handler.tick();
+  // No memory reads completed yet so again expect to return false and no change
+  // to `retval` or buffer
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 100);
+  for (int i = 0; i < 256; i++) {
+    EXPECT_EQ(buffer[i], 'q');
+  }
+
+  // Call tick() again, but mimic a memory read completing
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res1Span));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  outcome = handler.tick();
+  // Completed read and maxLength reached. Expect then() to have been called,
+  // the outcome to be true, and the buffer to have updated. RetVal should be
+  // maxLength
+  EXPECT_TRUE(outcome);
+  EXPECT_EQ(retVal, 1);
+  for (int i = 0; i < 256; i++) {
+    if (i == 0) {
+      EXPECT_EQ(buffer[i], (char)0xAB);
+    } else {
+      EXPECT_EQ(buffer[i], 'q');
+    }
+  }
+}
+
+// Test that `readBufferThen()` operates as expected
+TEST_F(AArch64ExceptionHandlerTest, readBufferThen) {
+  // Create new mock instruction and ExceptionHandler
+  std::shared_ptr<MockInstruction> uopPtr(new MockInstruction);
+  uopPtr->setSequenceId(5);
+  ExceptionHandler handler(uopPtr, core, memory, kernel);
+
+  // Initialise needed values for function
+  uint64_t retVal = 0;
+  uint64_t ptr = 0;
+  uint64_t length = 192;
+
+  // Initialise data to "read" from MockMemory
+  std::vector<char> dataVec(length, 'q');
+  std::vector<char> dataVec2(length, 'q');
+  // Initialise the two required targets (128-bytes per read request in
+  // readBufferThen())
+  MemoryAccessTarget tar1 = {ptr, 128};
+  MemoryAccessTarget tar2 = {ptr + 128, static_cast<uint16_t>(length - 128)};
+  // Initialise "responses" from the MockMemory
+  MemoryReadResult res1 = {tar1, RegisterValue(dataVec.data() + ptr, 128),
+                           uopPtr->getSequenceId()};
+  MemoryReadResult res2 = {
+      tar2, RegisterValue(dataVec.data() + ptr + 128, length - 128),
+      uopPtr->getSequenceId()};
+
+  // Confirm that internal dataBuffer is empty
+  EXPECT_EQ(handler.dataBuffer.size(), 0);
+
+  // Initial call to readBufferThen - expect resumeHandling to be updated to
+  // readBufferThen and a memory read request to have occurred
+  EXPECT_CALL(memory, requestRead(tar1, uopPtr->getSequenceId())).Times(1);
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  bool outcome = handler.readBufferThen(ptr, length, [&retVal]() {
+    retVal = 10;
+    return true;
+  });
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  EXPECT_EQ(handler.dataBuffer.size(), 0);
+
+  // Can now call tick() - on call, emulate no reads completed
+  ON_CALL(memory, getCompletedReads())
+      .WillByDefault(Return(span<MemoryReadResult>()));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  outcome = handler.tick();
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  EXPECT_EQ(handler.dataBuffer.size(), 0);
+
+  // Call tick() again, simulating completed read + new read requested as still
+  // data to fetch
+  ON_CALL(memory, getCompletedReads())
+      .WillByDefault(Return(span<MemoryReadResult>(&res1, 1)));
+  // Make sure clearCompletedReads() alters functionality of getCompletedReads()
+  ON_CALL(memory, clearCompletedReads())
+      .WillByDefault(::testing::InvokeWithoutArgs([&]() {
+        ON_CALL(memory, getCompletedReads())
+            .WillByDefault(Return(span<MemoryReadResult>()));
+      }));
+  EXPECT_CALL(memory, getCompletedReads()).Times(2);
+  EXPECT_CALL(memory, clearCompletedReads()).Times(1);
+  EXPECT_CALL(memory, requestRead(tar2, uopPtr->getSequenceId())).Times(1);
+  outcome = handler.tick();
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  EXPECT_EQ(handler.dataBuffer.size(), 128);
+  for (int i = 0; i < handler.dataBuffer.size(); i++) {
+    EXPECT_EQ(handler.dataBuffer[i], 'q');
+  }
+
+  // One final call to tick() to get last bits of data from memory and call
+  // then()
+  ON_CALL(memory, getCompletedReads())
+      .WillByDefault(Return(span<MemoryReadResult>(&res2, 1)));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  EXPECT_CALL(memory, clearCompletedReads()).Times(1);
+  outcome = handler.tick();
+  EXPECT_TRUE(outcome);
+  EXPECT_EQ(retVal, 10);
+  EXPECT_EQ(handler.dataBuffer.size(), length);
+  for (int i = 0; i < length; i++) {
+    EXPECT_EQ(handler.dataBuffer[i], static_cast<unsigned char>('q'));
+  }
+}
+
+// Test that `readBufferThen()` calls then if length is 0
+TEST_F(AArch64ExceptionHandlerTest, readBufferThen_length0) {
+  // Create new mock instruction and ExceptionHandler
+  std::shared_ptr<MockInstruction> uopPtr(new MockInstruction);
+  ExceptionHandler handler(uopPtr, core, memory, kernel);
+
+  const size_t expectedVal = 10;
+  uint64_t retVal = 0;
+  uint64_t ptr = 0;
+  uint64_t length = 0;
+
+  bool outcome = handler.readBufferThen(ptr, length, [&retVal]() {
+    retVal = 10;
+    return true;
+  });
+  EXPECT_TRUE(outcome);
+  EXPECT_EQ(retVal, expectedVal);
+}
+
+// Test that all AArch64 exception types print as expected
+TEST_F(AArch64ExceptionHandlerTest, printException) {
+  ON_CALL(core, getArchitecturalRegisterFileSet())
+      .WillByDefault(ReturnRef(archRegFileSet));
+  uint64_t insnAddr = 0x4;
+  MacroOp uops;
+
+  // Create instruction for EncodingUnallocated
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  InstructionException exception = InstructionException::EncodingUnallocated;
+  std::shared_ptr<Instruction> insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_0(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  std::stringstream buffer;
+  std::streambuf* sbuf = std::cout.rdbuf();  // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());           // Redirect cout to buffer
+  handler_0.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(),
+              HasSubstr("[SimEng:ExceptionHandler] Encountered illegal "
+                        "instruction exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for ExecutionNotYetImplemented
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::ExecutionNotYetImplemented;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_1(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_1.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(),
+              HasSubstr("[SimEng:ExceptionHandler] Encountered execution "
+                        "not-yet-implemented exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for AliasNotYetImplemented
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::AliasNotYetImplemented;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_2(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_2.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "alias not-yet-implemented exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for MisalignedPC
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::MisalignedPC;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_3(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_3.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(),
+              HasSubstr("[SimEng:ExceptionHandler] Encountered misaligned "
+                        "program counter exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for DataAbort
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::DataAbort;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_4(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_4.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(
+      buffer.str(),
+      HasSubstr("[SimEng:ExceptionHandler] Encountered data abort exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for SupervisorCall
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::SupervisorCall;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_5(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_5.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(
+      buffer.str(),
+      HasSubstr(
+          "[SimEng:ExceptionHandler] Encountered supervisor call exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for HypervisorCall
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::HypervisorCall;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_6(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_6.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(
+      buffer.str(),
+      HasSubstr(
+          "[SimEng:ExceptionHandler] Encountered hypervisor call exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for SecureMonitorCall
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::SecureMonitorCall;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_7(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_7.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "secure monitor call exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for NoAvailablePort
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::NoAvailablePort;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_8(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_8.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "unsupported execution port exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for UnmappedSysReg
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::UnmappedSysReg;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_9(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_9.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "unmapped system register exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for StreamingModeUpdate
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::StreamingModeUpdate;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_10(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_10.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "streaming mode update exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for ZAregisterStatusUpdate
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::ZAregisterStatusUpdate;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_11(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_11.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "ZA register status update exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for SMZAUpdate
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::SMZAUpdate;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_12(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_12.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(),
+              HasSubstr("[SimEng:ExceptionHandler] Encountered streaming mode "
+                        "& ZA register status update exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for ZAdisabled
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::ZAdisabled;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_13(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_13.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(),
+              HasSubstr("[SimEng:ExceptionHandler] Encountered ZA register "
+                        "access attempt when disabled exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for SMdisabled
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::SMdisabled;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_14(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_14.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(),
+              HasSubstr("[SimEng:ExceptionHandler] Encountered SME execution "
+                        "attempt when streaming mode disabled exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for default case
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::None;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_15(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_15.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(),
+              HasSubstr("[SimEng:ExceptionHandler] Encountered unknown (id: "
+                        "0) exception"));
+  buffer.str(std::string());
+  uops.clear();
+}
+
+}  // namespace aarch64
+}  // namespace arch
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc
new file mode 100644
index 0000000000..91c2e6946c
--- /dev/null
+++ b/test/unit/aarch64/InstructionTest.cc
@@ -0,0 +1,633 @@
+#include "../ConfigInit.hh"
+#include "../MockArchitecture.hh"
+#include "arch/aarch64/InstructionMetadata.hh"
+#include "gmock/gmock.h"
+#include "simeng/arch/aarch64/Instruction.hh"
+#include "simeng/version.hh"
+
+namespace simeng {
+namespace arch {
+namespace aarch64 {
+
+// AArch64 Instruction Tests
+class AArch64InstructionTest : public testing::Test {
+ public:
+  AArch64InstructionTest()
+      : os(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"]
+               .as<std::string>()),
+        arch(os) {
+    // Create InstructionMetadata objects
+    cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &capstoneHandle);
+    cs_option(capstoneHandle, CS_OPT_DETAIL, CS_OPT_ON);
+
+    // Create instructions which cover the 3 main types: Arithmetic, Memory,
+    // Branch. This allows for full testing of the Instruction class.
+
+    // fdiv
+    cs_insn rawInsn_fdiv;
+    cs_detail rawDetail_fdiv;
+    rawInsn_fdiv.detail = &rawDetail_fdiv;
+    size_t size_fdiv = 4;
+    uint64_t address_fdiv = 0;
+    const uint8_t* encoding_fdiv =
+        reinterpret_cast<const uint8_t*>(fdivInstrBytes.data());
+    cs_disasm_iter(capstoneHandle, &encoding_fdiv, &size_fdiv, &address_fdiv,
+                   &rawInsn_fdiv);
+    fdivMetadata = std::make_unique<InstructionMetadata>(rawInsn_fdiv);
+
+    // ldp
+    cs_insn rawInsn_ldp;
+    cs_detail rawDetail_ldp;
+    rawInsn_ldp.detail = &rawDetail_ldp;
+    size_t size_ldp = 4;
+    uint64_t address_ldp = 0;
+    const uint8_t* encoding_ldp =
+        reinterpret_cast<const uint8_t*>(ldpInstrBytes.data());
+    cs_disasm_iter(capstoneHandle, &encoding_ldp, &size_ldp, &address_ldp,
+                   &rawInsn_ldp);
+    ldpMetadata = std::make_unique<InstructionMetadata>(rawInsn_ldp);
+
+    // cbz
+    cs_insn rawInsn_cbz;
+    cs_detail rawDetail_cbz;
+    rawInsn_cbz.detail = &rawDetail_cbz;
+    size_t size_cbz = 4;
+    uint64_t address_cbz = 0;
+    const uint8_t* encoding_cbz =
+        reinterpret_cast<const uint8_t*>(cbzInstrBytes.data());
+    cs_disasm_iter(capstoneHandle, &encoding_cbz, &size_cbz, &address_cbz,
+                   &rawInsn_cbz);
+    cbzMetadata = std::make_unique<InstructionMetadata>(rawInsn_cbz);
+
+    const uint8_t* badEncoding =
+        reinterpret_cast<const uint8_t*>(invalidInstrBytes.data());
+    invalidMetadata = std::make_unique<InstructionMetadata>(badEncoding);
+  }
+
+  ~AArch64InstructionTest() { cs_close(&capstoneHandle); }
+
+ protected:
+  ConfigInit configInit = ConfigInit(config::ISA::AArch64, "");
+
+  // fdivr z1.s, p0/m, z1.s, z0.s
+  std::array<uint8_t, 4> fdivInstrBytes = {0x01, 0x80, 0x8c, 0x65};
+  // ldp x1, x2, [x3]
+  std::array<uint8_t, 4> ldpInstrBytes = {0x61, 0x08, 0x40, 0xA9};
+  // cbz x2, #0x28
+  std::array<uint8_t, 4> cbzInstrBytes = {0x42, 0x01, 0x00, 0xB4};
+  std::array<uint8_t, 4> invalidInstrBytes = {0x20, 0x00, 0x02, 0x8c};
+
+  // A Capstone decoding library handle, for decoding instructions.
+  csh capstoneHandle;
+
+  kernel::Linux os;
+  Architecture arch;
+
+  std::unique_ptr<InstructionMetadata> fdivMetadata;
+  std::unique_ptr<InstructionMetadata> ldpMetadata;
+  std::unique_ptr<InstructionMetadata> cbzMetadata;
+  std::unique_ptr<InstructionMetadata> invalidMetadata;
+  std::unique_ptr<MicroOpInfo> uopInfo;
+  InstructionException exception;
+};
+
+// Test that a valid instruction is created correctly
+TEST_F(AArch64InstructionTest, validInsn) {
+  // Insn is `fdivr z1.s, p0/m, z1.s, z0.s`
+  Instruction insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {{RegisterType::VECTOR, 1}};
+  std::vector<Register> srcRegs = {{RegisterType::PREDICATE, 0},
+                                   {RegisterType::VECTOR, 1},
+                                   {RegisterType::VECTOR, 0}};
+  const std::vector<uint16_t> ports = {1, 2, 3};
+  insn.setExecutionInfo({3, 4, ports});
+  insn.setInstructionAddress(0x48);
+  insn.setInstructionId(11);
+  insn.setSequenceId(12);
+
+  // Ensure that all instruction values are as expected after creation
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false;
+  EXPECT_EQ(&insn.getArchitecture(), &arch);
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Unknown);
+  EXPECT_EQ(insn.getData().size(), 0);
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size());
+  for (int i = 0; i < destRegs.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]);
+  }
+  EXPECT_EQ(insn.getException(), InstructionException::None);
+  EXPECT_EQ(insn.getGeneratedAddresses().size(), 0);
+  EXPECT_EQ(insn.getGroup(), InstructionGroups::SVE_DIV_OR_SQRT);
+  EXPECT_EQ(insn.getInstructionAddress(), 0x48);
+  EXPECT_EQ(insn.getInstructionId(), 11);
+  EXPECT_EQ(insn.getKnownOffset(), 0);
+  EXPECT_EQ(insn.getLatency(), 3);
+  EXPECT_EQ(insn.getLSQLatency(), 1);
+  EXPECT_EQ(&insn.getMetadata(), fdivMetadata.get());
+  EXPECT_EQ(insn.getMicroOpIndex(), 0);
+  // Results vector resized at decode
+  EXPECT_EQ(insn.getResults().size(), 1);
+  EXPECT_EQ(insn.getSequenceId(), 12);
+  // Operands vector resized at decode
+  EXPECT_EQ(insn.getSourceOperands().size(), 3);
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size());
+  for (int i = 0; i < srcRegs.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]);
+    EXPECT_FALSE(insn.isOperandReady(i));
+  }
+  EXPECT_EQ(insn.getStallCycles(), 4);
+  EXPECT_EQ(insn.getSupportedPorts(), ports);
+
+  EXPECT_FALSE(insn.canExecute());
+  EXPECT_FALSE(insn.isStoreAddress());
+  EXPECT_FALSE(insn.isStoreData());
+  EXPECT_FALSE(insn.isLoad());
+  EXPECT_FALSE(insn.isBranch());
+  EXPECT_FALSE(insn.exceptionEncountered());
+  EXPECT_FALSE(insn.hasExecuted());
+  EXPECT_FALSE(insn.canCommit());
+  EXPECT_TRUE(insn.hasAllData());
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_FALSE(insn.isFlushed());
+  EXPECT_FALSE(insn.isMicroOp());
+  EXPECT_TRUE(insn.isLastMicroOp());
+  EXPECT_FALSE(insn.isWaitingCommit());
+}
+
+// Test that an invalid instruction can be created - invalid due to byte stream
+TEST_F(AArch64InstructionTest, invalidInsn_1) {
+  Instruction insn = Instruction(arch, *invalidMetadata.get(), MicroOpInfo());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {};
+  std::vector<Register> srcRegs = {};
+  const std::vector<uint16_t> ports = {};
+  insn.setExecutionInfo({1, 1, ports});
+  insn.setInstructionAddress(0x44);
+  insn.setInstructionId(13);
+  insn.setSequenceId(14);
+
+  // Ensure that all instruction values are as expected after creation
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false;
+  EXPECT_EQ(&insn.getArchitecture(), &arch);
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Unknown);
+  EXPECT_EQ(insn.getData().size(), 0);
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size());
+  for (int i = 0; i < destRegs.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]);
+  }
+  EXPECT_EQ(insn.getException(), InstructionException::EncodingUnallocated);
+  EXPECT_EQ(insn.getGeneratedAddresses().size(), 0);
+  // Default Group
+  EXPECT_EQ(insn.getGroup(), InstructionGroups::INT_SIMPLE_ARTH_NOSHIFT);
+  EXPECT_EQ(insn.getInstructionAddress(), 0x44);
+  EXPECT_EQ(insn.getInstructionId(), 13);
+  EXPECT_EQ(insn.getKnownOffset(), 0);
+  EXPECT_EQ(insn.getLatency(), 1);
+  EXPECT_EQ(insn.getLSQLatency(), 1);
+  EXPECT_EQ(&insn.getMetadata(), invalidMetadata.get());
+  EXPECT_EQ(insn.getMicroOpIndex(), 0);
+  // Results vector resized at decode
+  EXPECT_EQ(insn.getResults().size(), 0);
+  EXPECT_EQ(insn.getSequenceId(), 14);
+  // Operands vector resized at decode
+  EXPECT_EQ(insn.getSourceOperands().size(), 0);
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size());
+  for (int i = 0; i < srcRegs.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]);
+    EXPECT_FALSE(insn.isOperandReady(i));
+  }
+  EXPECT_EQ(insn.getStallCycles(), 1);
+  EXPECT_EQ(insn.getSupportedPorts(), ports);
+
+  EXPECT_TRUE(insn.canExecute());
+  EXPECT_FALSE(insn.isStoreAddress());
+  EXPECT_FALSE(insn.isStoreData());
+  EXPECT_FALSE(insn.isLoad());
+  EXPECT_FALSE(insn.isBranch());
+  EXPECT_TRUE(insn.exceptionEncountered());
+  EXPECT_FALSE(insn.hasExecuted());
+  EXPECT_FALSE(insn.canCommit());
+  EXPECT_TRUE(insn.hasAllData());
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_FALSE(insn.isFlushed());
+  EXPECT_FALSE(insn.isMicroOp());
+  EXPECT_TRUE(insn.isLastMicroOp());
+  EXPECT_FALSE(insn.isWaitingCommit());
+}
+
+// Test that an invalid instruction can be created - invalid due to exception
+// provided
+TEST_F(AArch64InstructionTest, invalidInsn_2) {
+  Instruction insn = Instruction(arch, *invalidMetadata.get(),
+                                 InstructionException::HypervisorCall);
+  // Define instruction's registers
+  std::vector<Register> destRegs = {};
+  std::vector<Register> srcRegs = {};
+  const std::vector<uint16_t> ports = {};
+  insn.setExecutionInfo({1, 1, ports});
+  insn.setInstructionAddress(0x43);
+  insn.setInstructionId(15);
+  insn.setSequenceId(16);
+
+  // Ensure that all instruction values are as expected after creation
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false;
+  EXPECT_EQ(&insn.getArchitecture(), &arch);
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Unknown);
+  EXPECT_EQ(insn.getData().size(), 0);
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size());
+  for (int i = 0; i < destRegs.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]);
+  }
+  EXPECT_EQ(insn.getException(), InstructionException::HypervisorCall);
+  EXPECT_EQ(insn.getGeneratedAddresses().size(), 0);
+  // Default Group
+  EXPECT_EQ(insn.getGroup(), InstructionGroups::INT_SIMPLE_ARTH_NOSHIFT);
+  EXPECT_EQ(insn.getInstructionAddress(), 0x43);
+  EXPECT_EQ(insn.getInstructionId(), 15);
+  EXPECT_EQ(insn.getKnownOffset(), 0);
+  EXPECT_EQ(insn.getLatency(), 1);
+  EXPECT_EQ(insn.getLSQLatency(), 1);
+  EXPECT_EQ(&insn.getMetadata(), invalidMetadata.get());
+  EXPECT_EQ(insn.getMicroOpIndex(), 0);
+  // Results vector resized at decode
+  EXPECT_EQ(insn.getResults().size(), 0);
+  EXPECT_EQ(insn.getSequenceId(), 16);
+  // Operands vector resized at decode
+  EXPECT_EQ(insn.getSourceOperands().size(), 0);
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size());
+  for (int i = 0; i < srcRegs.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]);
+    EXPECT_FALSE(insn.isOperandReady(i));
+  }
+  EXPECT_EQ(insn.getStallCycles(), 1);
+  EXPECT_EQ(insn.getSupportedPorts(), ports);
+
+  EXPECT_TRUE(insn.canExecute());
+  EXPECT_FALSE(insn.isStoreAddress());
+  EXPECT_FALSE(insn.isStoreData());
+  EXPECT_FALSE(insn.isLoad());
+  EXPECT_FALSE(insn.isBranch());
+  EXPECT_TRUE(insn.exceptionEncountered());
+  EXPECT_FALSE(insn.hasExecuted());
+  EXPECT_FALSE(insn.canCommit());
+  EXPECT_TRUE(insn.hasAllData());
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_FALSE(insn.isFlushed());
+  EXPECT_FALSE(insn.isMicroOp());
+  EXPECT_TRUE(insn.isLastMicroOp());
+  EXPECT_FALSE(insn.isWaitingCommit());
+}
+
+// Test to ensure that source and operand registers can be renamed correctly
+TEST_F(AArch64InstructionTest, renameRegs) {
+  // Insn is `fdivr z1.s, p0/m, z1.s, z0.s`
+  Instruction insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {{RegisterType::VECTOR, 1}};
+  std::vector<Register> srcRegs = {{RegisterType::PREDICATE, 0},
+                                   {RegisterType::VECTOR, 1},
+                                   {RegisterType::VECTOR, 0}};
+  // Ensure registers decoded correctly
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size());
+  for (int i = 0; i < srcRegs.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]);
+  }
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size());
+  for (int i = 0; i < destRegs.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]);
+  }
+
+  // Define renamed registers
+  std::vector<Register> destRegs_new = {{RegisterType::VECTOR, 24}};
+  std::vector<Register> srcRegs_new = {{RegisterType::PREDICATE, 0},
+                                       {RegisterType::VECTOR, 97},
+                                       {RegisterType::VECTOR, 0}};
+  insn.renameDestination(0, destRegs_new[0]);
+  insn.renameSource(1, srcRegs_new[1]);
+  // Ensure renaming functionality works as expected
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs_new.size());
+  for (int i = 0; i < srcRegs_new.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs_new[i]);
+  }
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs_new.size());
+  for (int i = 0; i < destRegs_new.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs_new[i]);
+  }
+}
+
+// Test that operand values can be properly supplied and change the state of
+// `canExecute`
+TEST_F(AArch64InstructionTest, supplyOperand) {
+  // Insn is `fdivr z1.s, p0/m, z1.s, z0.s`
+  Instruction insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {{RegisterType::VECTOR, 1}};
+  std::vector<Register> srcRegs = {{RegisterType::PREDICATE, 0},
+                                   {RegisterType::VECTOR, 1},
+                                   {RegisterType::VECTOR, 0}};
+  // Check initial state is as expected
+  EXPECT_FALSE(insn.canExecute());
+  EXPECT_FALSE(insn.isOperandReady(0));
+  EXPECT_FALSE(insn.isOperandReady(1));
+  EXPECT_FALSE(insn.isOperandReady(2));
+
+  // Define mock register values for source registers
+  RegisterValue vec = {0xABBACAFE01234567, 256};
+  uint64_t pred_vals[4] = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF};
+  RegisterValue pred = {pred_vals, 32};
+  // Supply values for all source registers
+  insn.supplyOperand(0, pred);
+  insn.supplyOperand(1, vec);
+  insn.supplyOperand(2, vec);
+  // Ensure Instruction state has updated as expected
+  EXPECT_TRUE(insn.canExecute());
+  EXPECT_TRUE(insn.isOperandReady(0));
+  EXPECT_TRUE(insn.isOperandReady(1));
+  EXPECT_TRUE(insn.isOperandReady(2));
+  auto sourceVals = insn.getSourceOperands();
+  EXPECT_EQ(sourceVals.size(), 3);
+  EXPECT_EQ(sourceVals[0], pred);
+  EXPECT_EQ(sourceVals[1], vec);
+  EXPECT_EQ(sourceVals[2], vec);
+
+  // Ensure instruction execute updates instruction state as expected, and
+  // produces the expected result.
+  EXPECT_FALSE(insn.hasExecuted());
+  insn.execute();
+  EXPECT_TRUE(insn.hasExecuted());
+  auto results = insn.getResults();
+  uint64_t vals[2] = {0x3f8000003f800000, 0x7fc000007fc00000};
+  RegisterValue refRes = {vals, 256};
+  EXPECT_EQ(results.size(), 1);
+  EXPECT_EQ(results[0], refRes);
+}
+
+// Test that data can be supplied successfully
+TEST_F(AArch64InstructionTest, supplyData) {
+  // Insn is `ldp x1, x2, [x3]`
+  Instruction insn = Instruction(arch, *ldpMetadata.get(), MicroOpInfo());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {{RegisterType::GENERAL, 1},
+                                    {RegisterType::GENERAL, 2}};
+  std::vector<Register> srcRegs = {{RegisterType::GENERAL, 3}};
+
+  // Check instruction created correctly
+  EXPECT_FALSE(insn.exceptionEncountered());
+  EXPECT_EQ(&insn.getMetadata(), ldpMetadata.get());
+  EXPECT_EQ(insn.getGroup(), InstructionGroups::LOAD_INT);
+
+  // Check source and destination registers extracted correctly
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size());
+  for (int i = 0; i < srcRegs.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]);
+  }
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size());
+  for (int i = 0; i < destRegs.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]);
+  }
+
+  // Supply needed operands
+  EXPECT_FALSE(insn.isOperandReady(0));
+  RegisterValue addr = {0x480, 8};
+  insn.supplyOperand(0, addr);
+  EXPECT_TRUE(insn.isOperandReady(0));
+
+  // Generate memory addresses
+  EXPECT_EQ(insn.getGeneratedAddresses().size(), 0);
+  insn.generateAddresses();
+  auto generatedAddresses = insn.getGeneratedAddresses();
+  EXPECT_EQ(generatedAddresses.size(), 2);
+  for (int i = 0; i < generatedAddresses.size(); i++) {
+    EXPECT_EQ(generatedAddresses[i].address, 0x480 + (i * 0x8));
+    EXPECT_EQ(generatedAddresses[i].size, 8);
+  }
+
+  // Supply required data
+  EXPECT_FALSE(insn.hasAllData());
+  std::vector<RegisterValue> data = {{123, 8}, {456, 8}};
+  EXPECT_EQ(generatedAddresses.size(), data.size());
+  for (int i = 0; i < generatedAddresses.size(); i++) {
+    insn.supplyData(generatedAddresses[i].address, data[i]);
+  }
+  // Ensure data was supplied correctly
+  auto retrievedData = insn.getData();
+  for (int i = 0; i < retrievedData.size(); i++) {
+    EXPECT_EQ(retrievedData[i], data[i]);
+  }
+  EXPECT_TRUE(insn.hasAllData());
+}
+
+// Test DataAbort Exception is triggered correctly when supplying data
+TEST_F(AArch64InstructionTest, supplyData_dataAbort) {
+  // Insn is `ldp x1, x2, [x3]`
+  Instruction insn = Instruction(arch, *ldpMetadata.get(), MicroOpInfo());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {{RegisterType::GENERAL, 1},
+                                    {RegisterType::GENERAL, 2}};
+  std::vector<Register> srcRegs = {{RegisterType::GENERAL, 3}};
+
+  // Check instruction created correctly
+  EXPECT_EQ(&insn.getMetadata(), ldpMetadata.get());
+  EXPECT_EQ(insn.getGroup(), InstructionGroups::LOAD_INT);
+
+  // Supply needed operands
+  EXPECT_FALSE(insn.isOperandReady(0));
+  RegisterValue addr = {0x480, 8};
+  insn.supplyOperand(0, addr);
+  EXPECT_TRUE(insn.isOperandReady(0));
+
+  // Generate memory addresses
+  EXPECT_EQ(insn.getGeneratedAddresses().size(), 0);
+  insn.generateAddresses();
+  auto generatedAddresses = insn.getGeneratedAddresses();
+  EXPECT_EQ(generatedAddresses.size(), 2);
+  for (int i = 0; i < generatedAddresses.size(); i++) {
+    EXPECT_EQ(generatedAddresses[i].address, 0x480 + (i * 0x8));
+    EXPECT_EQ(generatedAddresses[i].size, 8);
+  }
+
+  // Trigger data abort
+  EXPECT_FALSE(insn.exceptionEncountered());
+  insn.supplyData(generatedAddresses[0].address, RegisterValue());
+  EXPECT_TRUE(insn.exceptionEncountered());
+  EXPECT_EQ(insn.getException(), InstructionException::DataAbort);
+}
+
+// Test to check logic around early branch misprediction logic
+TEST_F(AArch64InstructionTest, earlyBranchMisprediction) {
+  // Insn is `fdivr z1.s, p0/m, z1.s, z0.s`
+  Instruction insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo());
+  insn.setInstructionAddress(64);
+
+  // Check initial state of an instruction's branch related options
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Unknown);
+  EXPECT_FALSE(insn.isBranch());
+  std::tuple<bool, uint64_t> tup = {false, insn.getInstructionAddress() + 4};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+
+  // Set prediction and ensure expected state changes / outcomes are seen
+  pred = {true, 0x4848};
+  insn.setBranchPrediction(pred);
+  matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Unknown);
+  // Check logic of `checkEarlyBranchMisprediction` which is different for
+  // non-branch instructions
+  EXPECT_FALSE(insn.isBranch());
+  tup = {true, insn.getInstructionAddress() + 4};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+}
+
+// Test that a correct prediction (branch taken) is handled correctly
+TEST_F(AArch64InstructionTest, correctPred_taken) {
+  // insn is `cbz x2, #0x28`
+  Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo());
+  insn.setInstructionAddress(80);
+
+  // Check initial state of an instruction's branch related options
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Conditional);
+  EXPECT_TRUE(insn.isBranch());
+  std::tuple<bool, uint64_t> tup = {false, 0};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+
+  // Test a correct prediction where branch is taken is handled correctly
+  pred = {true, 80 + 0x28};
+  insn.setBranchPrediction(pred);
+  matchingPred = (insn.getBranchPrediction() == pred);
+  insn.supplyOperand(0, RegisterValue(0, 8));
+  insn.execute();
+  EXPECT_TRUE(matchingPred);
+  EXPECT_TRUE(insn.wasBranchTaken());
+  EXPECT_FALSE(insn.wasBranchMispredicted());
+  EXPECT_EQ(insn.getBranchAddress(), pred.target);
+}
+
+// Test that a correct prediction (branch not taken) is handled correctly
+TEST_F(AArch64InstructionTest, correctPred_notTaken) {
+  // insn is `cbz x2, #0x28`
+  Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo());
+  insn.setInstructionAddress(80);
+
+  // Check initial state of an instruction's branch related options
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Conditional);
+  EXPECT_TRUE(insn.isBranch());
+  std::tuple<bool, uint64_t> tup = {false, 0};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+
+  // Test a correct prediction where a branch isn't taken is handled correctly
+  pred = {false, 80 + 4};
+  insn.setBranchPrediction(pred);
+  matchingPred = (insn.getBranchPrediction() == pred);
+  insn.supplyOperand(0, RegisterValue(1, 8));
+  insn.execute();
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_FALSE(insn.wasBranchMispredicted());
+  EXPECT_EQ(insn.getBranchAddress(), pred.target);
+}
+
+// Test that an incorrect prediction (wrong target) is handled correctly
+TEST_F(AArch64InstructionTest, incorrectPred_target) {
+  // insn is `cbz x2, #0x28`
+  Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo());
+  insn.setInstructionAddress(100);
+
+  // Check initial state of an instruction's branch related options
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Conditional);
+  EXPECT_TRUE(insn.isBranch());
+  std::tuple<bool, uint64_t> tup = {false, 0};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+
+  // Test an incorrect prediction is handled correctly - target is wrong
+  pred = {true, 80 + 0x28};
+  insn.setBranchPrediction(pred);
+  matchingPred = (insn.getBranchPrediction() == pred);
+  insn.supplyOperand(0, RegisterValue(0, 8));
+  insn.execute();
+  EXPECT_TRUE(matchingPred);
+  EXPECT_TRUE(insn.wasBranchTaken());
+  EXPECT_TRUE(insn.wasBranchMispredicted());
+  EXPECT_EQ(insn.getBranchAddress(), 100 + 0x28);
+}
+
+// Test that an incorrect prediction (wrong taken) is handled correctly
+TEST_F(AArch64InstructionTest, incorrectPred_taken) {
+  // insn is `cbz x2, #0x28`
+  Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo());
+  insn.setInstructionAddress(100);
+
+  // Check initial state of an instruction's branch related options
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Conditional);
+  EXPECT_TRUE(insn.isBranch());
+  std::tuple<bool, uint64_t> tup = {false, 0};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+
+  // Test an incorrect prediction is handled correctly - taken is wrong
+  pred = {true, 100 + 0x28};
+  insn.setBranchPrediction(pred);
+  matchingPred = (insn.getBranchPrediction() == pred);
+  insn.supplyOperand(0, RegisterValue(1, 8));
+  insn.execute();
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_TRUE(insn.wasBranchMispredicted());
+  EXPECT_EQ(insn.getBranchAddress(), 100 + 4);
+}
+
+// Test commit and flush setters such as `setFlushed`, `setCommitReady`, etc.
+TEST_F(AArch64InstructionTest, setters) {
+  // Insn is `fdivr z1.s, p0/m, z1.s, z0.s`
+  Instruction insn = Instruction(arch, *fdivMetadata.get(), MicroOpInfo());
+
+  EXPECT_FALSE(insn.canCommit());
+  insn.setCommitReady();
+  EXPECT_TRUE(insn.canCommit());
+
+  EXPECT_FALSE(insn.isFlushed());
+  insn.setFlushed();
+  EXPECT_TRUE(insn.isFlushed());
+
+  EXPECT_FALSE(insn.isWaitingCommit());
+  insn.setWaitingCommit();
+  EXPECT_TRUE(insn.isWaitingCommit());
+}
+
+}  // namespace aarch64
+}  // namespace arch
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/data/stream-aarch64.elf b/test/unit/data/stream-aarch64.elf
new file mode 100755
index 0000000000..881a5150a0
Binary files /dev/null and b/test/unit/data/stream-aarch64.elf differ
diff --git a/test/unit/data/stream.rv32ima.elf b/test/unit/data/stream.rv32ima.elf
new file mode 100644
index 0000000000..ded6502b12
Binary files /dev/null and b/test/unit/data/stream.rv32ima.elf differ
diff --git a/test/unit/pipeline/A64FXPortAllocatorTest.cc b/test/unit/pipeline/A64FXPortAllocatorTest.cc
index 2e7b5df70b..f593788684 100644
--- a/test/unit/pipeline/A64FXPortAllocatorTest.cc
+++ b/test/unit/pipeline/A64FXPortAllocatorTest.cc
@@ -8,19 +8,29 @@
 namespace simeng {
 namespace pipeline {
 
-std::vector<uint64_t> rsFreeEntries = {20, 20, 10, 10, 19};
+class A64FXPortAllocatorTest : public testing::Test {
+ public:
+  A64FXPortAllocatorTest() : portAllocator(portArrangement) {
+    portAllocator.setRSSizeGetter(
+        [this](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+  }
 
-void rsSizes(std::vector<uint64_t>& sizeVec) { sizeVec = rsFreeEntries; }
+  void rsSizes(std::vector<uint64_t>& sizeVec) const {
+    sizeVec = rsFreeEntries;
+  }
 
-// Representation of the A64FX port layout
-const std::vector<std::vector<uint16_t>> portArrangement = {{0}, {1}, {2}, {3},
-                                                            {4}, {5}, {6}, {7}};
+ protected:
+  // Representation of the A64FX reservation station layout
+  std::vector<uint64_t> rsFreeEntries = {20, 20, 10, 10, 19};
+  // Representation of the A64FX port layout
+  const std::vector<std::vector<uint16_t>> portArrangement = {
+      {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}};
+
+  A64FXPortAllocator portAllocator;
+};
 
 // Tests correct allocation for RSE0/RSE1/BR attribute groups
-TEST(A64FXPortAllocatorTest, singlePortAllocation) {
-  auto portAllocator = A64FXPortAllocator(portArrangement);
-  portAllocator.setRSSizeGetter(
-      [](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+TEST_F(A64FXPortAllocatorTest, singlePortAllocation) {
   // Allocate in blocks of 4 to simulate dispatch width of 4 and test dispatch
   // slot logic
 
@@ -57,10 +67,7 @@ TEST(A64FXPortAllocatorTest, singlePortAllocation) {
 }
 
 // Tests correct allocation when for RSX
-TEST(A64FXPortAllocatorTest, RSX) {
-  auto portAllocator = A64FXPortAllocator(portArrangement);
-  portAllocator.setRSSizeGetter(
-      [](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+TEST_F(A64FXPortAllocatorTest, RSX) {
   rsFreeEntries = {10, 10, 10, 10, 19};
   portAllocator.tick();
   EXPECT_EQ(portAllocator.allocate({2, 4, 5, 6}), 2);
@@ -83,10 +90,7 @@ TEST(A64FXPortAllocatorTest, RSX) {
 }
 
 // Tests correct allocation when for RSE/RSA
-TEST(A64FXPortAllocatorTest, RSEA) {
-  auto portAllocator = A64FXPortAllocator(portArrangement);
-  portAllocator.setRSSizeGetter(
-      [](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+TEST_F(A64FXPortAllocatorTest, RSEA) {
   rsFreeEntries = {20, 20, 10, 10, 19};
   // RSE
   portAllocator.tick();
@@ -120,10 +124,7 @@ TEST(A64FXPortAllocatorTest, RSEA) {
 }
 
 // Test correct allocation for Table 1 condition
-TEST(A64FXPortAllocator, table1) {
-  auto portAllocator = A64FXPortAllocator(portArrangement);
-  portAllocator.setRSSizeGetter(
-      [](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+TEST_F(A64FXPortAllocatorTest, table1) {
   rsFreeEntries = {20, 0, 0, 0, 19};
   portAllocator.tick();
   EXPECT_EQ(portAllocator.allocate({2, 4, 5, 6}), 2);
@@ -146,10 +147,7 @@ TEST(A64FXPortAllocator, table1) {
 }
 
 // Test correct allocation for Table 2 condition
-TEST(A64FXPortAllocator, table2) {
-  auto portAllocator = A64FXPortAllocator(portArrangement);
-  portAllocator.setRSSizeGetter(
-      [](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+TEST_F(A64FXPortAllocatorTest, table2) {
   rsFreeEntries = {20, 20, 0, 0, 19};
   portAllocator.tick();
   EXPECT_EQ(portAllocator.allocate({2, 4, 5, 6}), 2);
@@ -172,10 +170,7 @@ TEST(A64FXPortAllocator, table2) {
 }
 
 // Test correct allocation for Table 3 condition
-TEST(A64FXPortAllocator, table3) {
-  auto portAllocator = A64FXPortAllocator(portArrangement);
-  portAllocator.setRSSizeGetter(
-      [](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+TEST_F(A64FXPortAllocatorTest, table3) {
   rsFreeEntries = {0, 0, 10, 10, 19};
   portAllocator.tick();
   EXPECT_EQ(portAllocator.allocate({2, 4, 5, 6}), 5);
@@ -198,10 +193,7 @@ TEST(A64FXPortAllocator, table3) {
 }
 
 // Test correct allocation for Table 5  condition
-TEST(A64FXPortAllocator, table5) {
-  auto portAllocator = A64FXPortAllocator(portArrangement);
-  portAllocator.setRSSizeGetter(
-      [](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+TEST_F(A64FXPortAllocatorTest, table5) {
   rsFreeEntries = {9, 9, 10, 9, 19};
   portAllocator.tick();
   EXPECT_EQ(portAllocator.allocate({2, 4, 5, 6}), 5);
@@ -223,11 +215,8 @@ TEST(A64FXPortAllocator, table5) {
   rsFreeEntries[1]--;
 }
 
-// Test correct allocation for Table 6  condition
-TEST(A64FXPortAllocator, table6) {
-  auto portAllocator = A64FXPortAllocator(portArrangement);
-  portAllocator.setRSSizeGetter(
-      [](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+// Test correct allocation for Table 6 condition
+TEST_F(A64FXPortAllocatorTest, table6) {
   rsFreeEntries = {20, 0, 10, 0, 19};
   portAllocator.tick();
   EXPECT_EQ(portAllocator.allocate({2, 4}), 2);
@@ -250,10 +239,7 @@ TEST(A64FXPortAllocator, table6) {
 }
 
 // Test adherence to the dispatch slot logic
-TEST(A64FXPortAllocator, dispatchSlots) {
-  auto portAllocator = A64FXPortAllocator(portArrangement);
-  portAllocator.setRSSizeGetter(
-      [](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+TEST_F(A64FXPortAllocatorTest, dispatchSlots) {
   rsFreeEntries = {10, 10, 10, 10, 19};
 
   // With less than 4 instructions dispatched in a cycle, the next cycle should
diff --git a/test/unit/pipeline/DecodeUnitTest.cc b/test/unit/pipeline/DecodeUnitTest.cc
index 71062f35bc..f86dbc0caf 100644
--- a/test/unit/pipeline/DecodeUnitTest.cc
+++ b/test/unit/pipeline/DecodeUnitTest.cc
@@ -22,6 +22,8 @@ class PipelineDecodeUnitTest : public testing::Test {
         decodeUnit(input, output, predictor),
         uop(new MockInstruction),
         uopPtr(uop),
+        uop2(new MockInstruction),
+        uop2Ptr(uop2),
         sourceRegisters({{0, 0}}) {}
 
  protected:
@@ -33,6 +35,8 @@ class PipelineDecodeUnitTest : public testing::Test {
 
   MockInstruction* uop;
   std::shared_ptr<Instruction> uopPtr;
+  MockInstruction* uop2;
+  std::shared_ptr<Instruction> uop2Ptr;
 
   std::vector<Register> sourceRegisters;
 };
@@ -60,6 +64,7 @@ TEST_F(PipelineDecodeUnitTest, Tick) {
 
   // Check no flush was requested
   EXPECT_EQ(decodeUnit.shouldFlush(), false);
+  EXPECT_EQ(decodeUnit.getEarlyFlushes(), 0);
 }
 
 // Tests that the decode unit requests a flush when a non-branch is mispredicted
@@ -84,6 +89,26 @@ TEST_F(PipelineDecodeUnitTest, Flush) {
   // Check that a flush was correctly requested
   EXPECT_EQ(decodeUnit.shouldFlush(), true);
   EXPECT_EQ(decodeUnit.getFlushAddress(), 1);
+  EXPECT_EQ(decodeUnit.getEarlyFlushes(), 1);
+}
+
+// Tests that PurgeFlushed empties the microOps queue
+TEST_F(PipelineDecodeUnitTest, purgeFlushed) {
+  input.getHeadSlots()[0] = {uopPtr, uop2Ptr};
+
+  decodeUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0].get(), uop);
+  EXPECT_EQ(input.getHeadSlots()[0].size(), 0);
+
+  // Clear micro-ops queue
+  decodeUnit.purgeFlushed();
+  // Swap output head and tail
+  output.tick();
+
+  decodeUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+  EXPECT_EQ(output.getHeadSlots()[0].get(), uop);
+  EXPECT_EQ(input.getHeadSlots()[0].size(), 0);
 }
 
 }  // namespace pipeline
diff --git a/test/unit/pipeline/DispatchIssueUnitTest.cc b/test/unit/pipeline/DispatchIssueUnitTest.cc
new file mode 100644
index 0000000000..9bd953f567
--- /dev/null
+++ b/test/unit/pipeline/DispatchIssueUnitTest.cc
@@ -0,0 +1,573 @@
+#include "../ConfigInit.hh"
+#include "../MockInstruction.hh"
+#include "../MockPortAllocator.hh"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "simeng/pipeline/DispatchIssueUnit.hh"
+#include "simeng/version.hh"
+
+namespace simeng {
+namespace pipeline {
+
+using ::testing::Return;
+using ::testing::ReturnRef;
+
+class PipelineDispatchIssueUnitTest : public testing::Test {
+ public:
+  PipelineDispatchIssueUnitTest()
+      : regFile(physRegStruct),
+        input(1, nullptr),
+        output(config::SimInfo::getConfig()["Execution-Units"].num_children(),
+               {1, nullptr}),
+        diUnit(input, output, regFile, portAlloc, physRegQuants),
+        uop(new MockInstruction),
+        uopPtr(uop),
+        uop2(new MockInstruction),
+        uop2Ptr(uop2) {}
+
+ protected:
+  // More complex model used to enable better testing of the DispatchIssueUnit
+  // class.
+  ConfigInit configInit = ConfigInit(config::ISA::AArch64, R"YAML({
+  Ports: {
+    '0': {Portname: Port 0, Instruction-Group-Support: [FP, SVE]},
+    '1': {Portname: Port 1, Instruction-Group-Support: [PREDICATE]},
+    '2': {Portname: Port 2, Instruction-Group-Support: [INT_SIMPLE, INT_MUL, STORE_DATA]},
+    '3': {Portname: Port 3, Instruction-Group-Support: [FP_SIMPLE, FP_MUL, SVE_SIMPLE, SVE_MUL]},
+    '4': {Portname: Port 4, Instruction-Group-Support: [INT_SIMPLE, INT_DIV_OR_SQRT]},
+    '5': {Portname: Port 5, Instruction-Group-Support: [LOAD, STORE_ADDRESS, INT_SIMPLE_ARTH_NOSHIFT, INT_SIMPLE_LOGICAL_NOSHIFT, INT_SIMPLE_CMP]},
+    '6': {Portname: Port 6, Instruction-Group-Support: [LOAD, STORE_ADDRESS, INT_SIMPLE_ARTH_NOSHIFT, INT_SIMPLE_LOGICAL_NOSHIFT, INT_SIMPLE_CMP]},
+    '7': {Portname: Port 7, Instruction-Group-Support: [BRANCH]}
+  },
+  Reservation-Stations: {
+    '0': {Size: 20, Dispatch-Rate: 2, Ports: [Port 0, Port 1, Port 2]},
+    '1': {Size: 20, Dispatch-Rate: 2, Ports: [Port 3, Port 4]},
+    '2': {Size: 10, Dispatch-Rate: 1, Ports: [Port 5]},
+    '3': {Size: 10, Dispatch-Rate: 1, Ports: [Port 6]},
+    '4': {Size: 19, Dispatch-Rate: 1, Ports: [Port 7]}
+  },
+  Execution-Units: {
+    '0': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+    '1': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+    '2': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+    '3': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+    '4': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+    '5': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+    '6': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]},
+    '7': {Pipelined: True, Blocking-Groups: [INT_DIV_OR_SQRT, FP_DIV_OR_SQRT, SVE_DIV_OR_SQRT]}
+  }
+  })YAML");
+
+  // Using AArch64 as basis: {GP, FP/SVE, PRED, COND, SYS, SME}
+  const std::vector<uint16_t> physRegQuants = {96, 128, 48, 128, 64, 64};
+  const std::vector<RegisterFileStructure> physRegStruct = {
+      {8, physRegQuants[0]}, {256, physRegQuants[1]}, {32, physRegQuants[2]},
+      {1, physRegQuants[3]}, {8, physRegQuants[4]},   {256, physRegQuants[5]}};
+  RegisterFileSet regFile;
+
+  PipelineBuffer<std::shared_ptr<Instruction>> input;
+  std::vector<PipelineBuffer<std::shared_ptr<Instruction>>> output;
+
+  MockPortAllocator portAlloc;
+
+  simeng::pipeline::DispatchIssueUnit diUnit;
+
+  MockInstruction* uop;
+  std::shared_ptr<Instruction> uopPtr;
+  MockInstruction* uop2;
+  std::shared_ptr<Instruction> uop2Ptr;
+
+  // As per a64fx.yaml
+  const uint16_t EAGA = 5;    // Maps to RS index 2
+  const uint8_t RS_EAGA = 2;  // RS associated with EAGA in A64FX
+  const std::vector<uint64_t> refRsSizes = {20, 20, 10, 10, 19};
+
+  const Register r0 = {0, 0};
+  const Register r1 = {0, 1};
+  const Register r2 = {0, 2};
+};
+
+// No instruction issued due to empty input buffer
+TEST_F(PipelineDispatchIssueUnitTest, emptyTick) {
+  // Ensure empty Reservation stations pre tick()
+  std::vector<uint64_t> rsSizes;
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+
+  diUnit.tick();
+  // Post tick(), ensure RS sizes are still the same + no RS stalls
+  rsSizes.clear();
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+
+  diUnit.issue();
+  // Post issue(), ensure Reservation stations are empty
+  rsSizes.clear();
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+  // Post issue(), ensure output buffers are empty
+  for (size_t i = 0; i < output.size(); i++) {
+    EXPECT_EQ(output[i].getTailSlots()[0], nullptr);
+  }
+  // Post issue(), ensure only front-end stall recorded
+  EXPECT_EQ(diUnit.getFrontendStalls(), 1);
+  EXPECT_EQ(diUnit.getBackendStalls(), 0);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+}
+
+// Single instruction has no exception, 2 source operands (both ready), 1
+// destination operand
+TEST_F(PipelineDispatchIssueUnitTest, singleInstr) {
+  // Set-up source & destination registers and ports for this instruction
+  std::array<Register, 2> srcRegs = {r1, r2};
+  std::array<Register, 1> destRegs = {r0};
+  const std::vector<uint16_t> suppPorts = {EAGA};
+
+  // All expected calls to instruction during tick()
+  EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts));
+  uop->setExceptionEncountered(false);
+  EXPECT_CALL(*uop, getSourceRegisters())
+      .WillOnce(Return(span<Register>(srcRegs)));
+  EXPECT_CALL(*uop, isOperandReady(0)).WillOnce(Return(false));
+  EXPECT_CALL(*uop, supplyOperand(0, RegisterValue(0, 8)));
+  EXPECT_CALL(*uop, isOperandReady(1)).WillOnce(Return(false));
+  EXPECT_CALL(*uop, supplyOperand(1, RegisterValue(0, 8)));
+  EXPECT_CALL(*uop, getDestinationRegisters())
+      .WillOnce(Return(span<Register>(destRegs)));
+
+  // Expected call to port allocator during tick()
+  EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA));
+
+  // Ensure empty reservation stations pre tick()
+  std::vector<uint64_t> rsSizes;
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+
+  input.getHeadSlots()[0] = uopPtr;
+  diUnit.tick();
+  // Ensure post tick that EAGA's reservation station size has decreased by 1
+  rsSizes.clear();
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes.size(), refRsSizes.size());
+  EXPECT_EQ(rsSizes[RS_EAGA], refRsSizes[RS_EAGA] - 1);
+  // Ensure no stalls recorded in tick()
+  EXPECT_EQ(diUnit.getFrontendStalls(), 0);
+  EXPECT_EQ(diUnit.getBackendStalls(), 0);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+  // Ensure empty output buffers post tick()
+  for (size_t i = 0; i < output.size(); i++) {
+    EXPECT_EQ(output[i].getTailSlots()[0], nullptr);
+  }
+
+  // Detail expected call to port allocator during tick()
+  EXPECT_CALL(portAlloc, issued(EAGA));
+
+  diUnit.issue();
+  // Ensure all reservation stations empty again post issue()
+  rsSizes.clear();
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+  // Ensure no stalls recorded during issue()
+  EXPECT_EQ(diUnit.getFrontendStalls(), 0);
+  EXPECT_EQ(diUnit.getBackendStalls(), 0);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+  // Ensure all output buffers are empty, except the one associated with EAGA
+  // port which contains the uop
+  for (size_t i = 0; i < output.size(); i++) {
+    if (i != EAGA)
+      EXPECT_EQ(output[i].getTailSlots()[0], nullptr);
+    else
+      EXPECT_EQ(output[i].getTailSlots()[0].get(), uop);
+  }
+}
+
+// Single instruction with exception
+TEST_F(PipelineDispatchIssueUnitTest, singleInstr_exception) {
+  // Setup supported port instruction can use
+  const std::vector<uint16_t> suppPorts = {EAGA};
+
+  // All expected calls to instruction during tick()
+  EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts));
+  uop->setExceptionEncountered(true);
+
+  input.getHeadSlots()[0] = uopPtr;
+  diUnit.tick();
+  // Check that instruction has encountered an exception and that it is ready to
+  // commit
+  EXPECT_TRUE(uop->canCommit());
+  EXPECT_TRUE(uop->exceptionEncountered());
+  // Ensure all reservation stations are empty post tick()
+  std::vector<uint64_t> rsSizes;
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+  // Ensure input buffer has been emptied
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+
+  // Perform issue()
+  diUnit.issue();
+  // Ensure RS still empty post issue()
+  rsSizes.clear();
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+  // Ensure all output ports are empty
+  for (int i = 0; i < output.size(); i++) {
+    EXPECT_EQ(output[i].getTailSlots()[0], nullptr);
+  }
+  // Ensure frontend stall recorded
+  EXPECT_EQ(diUnit.getFrontendStalls(), 1);
+  EXPECT_EQ(diUnit.getBackendStalls(), 0);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+}
+
+// Single instruction that can't be issued in 1 cycle as RS is full
+TEST_F(PipelineDispatchIssueUnitTest, singleInstr_rsFull) {
+  // Setup supported port instructions can use
+  const std::vector<uint16_t> suppPorts = {EAGA};
+
+  // Artificially fill Reservation station with index 2
+  std::vector<std::shared_ptr<MockInstruction>> insns(refRsSizes[RS_EAGA]);
+  for (int i = 0; i < insns.size(); i++) {
+    // Initialise instruction
+    insns[i] = std::make_shared<MockInstruction>();
+    // All expected calls to instruction during tick()
+    EXPECT_CALL(*insns[i].get(), getSupportedPorts())
+        .WillOnce(ReturnRef(suppPorts));
+    EXPECT_CALL(*insns[i].get(), getSourceRegisters())
+        .WillOnce(Return(span<Register>()));
+    EXPECT_CALL(*insns[i].get(), getDestinationRegisters())
+        .WillOnce(Return(span<Register>()));
+    // Expected call to port allocator during tick()
+    EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA));
+
+    input.getHeadSlots()[0] = insns[i];
+    diUnit.tick();
+  }
+  // Ensure Reservation station index 2 is full post tick, and all others are
+  // empty
+  std::vector<uint64_t> rsSizes;
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes.size(), refRsSizes.size());
+  for (int i = 0; i < refRsSizes.size(); i++) {
+    if (i != RS_EAGA) {
+      EXPECT_EQ(rsSizes[i], refRsSizes[i]);
+    } else {
+      EXPECT_EQ(rsSizes[i], 0);
+      EXPECT_NE(rsSizes[i], refRsSizes[i]);
+    }
+  }
+  // Ensure no stalls recorded in tick()
+  EXPECT_EQ(diUnit.getFrontendStalls(), 0);
+  EXPECT_EQ(diUnit.getBackendStalls(), 0);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+
+  // Submit new instruction to same port
+  // All expected calls to instruction during tick()
+  EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts));
+  // All expected calls to portAllocator during tick()
+  EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA));
+  EXPECT_CALL(portAlloc, deallocate(EAGA));
+  input.getHeadSlots()[0] = uopPtr;
+  diUnit.tick();
+  // Ensure Reservation station sizes have stayed the same
+  rsSizes.clear();
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes.size(), refRsSizes.size());
+  for (int i = 0; i < refRsSizes.size(); i++) {
+    if (i != RS_EAGA) {
+      EXPECT_EQ(rsSizes[i], refRsSizes[i]);
+    } else {
+      EXPECT_EQ(rsSizes[i], 0);
+      EXPECT_NE(rsSizes[i], refRsSizes[i]);
+    }
+  }
+  // Check input pipelineBuffer stalled
+  EXPECT_TRUE(input.isStalled());
+  // Ensure one rsStall recorded in tick()
+  EXPECT_EQ(diUnit.getFrontendStalls(), 0);
+  EXPECT_EQ(diUnit.getBackendStalls(), 0);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 1);
+}
+
+// Single instruction not issued in 1 cycle as port is stalled
+TEST_F(PipelineDispatchIssueUnitTest, singleInstr_portStall) {
+  // Setup supported port instructions can use
+  const std::vector<uint16_t> suppPorts = {EAGA};
+
+  // Submit new instruction to a port
+  // All expected calls to instruction during tick()
+  EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts));
+  uop->setExceptionEncountered(false);
+  EXPECT_CALL(*uop, getSourceRegisters()).WillOnce(Return(span<Register>()));
+  EXPECT_CALL(*uop, getDestinationRegisters())
+      .WillOnce(Return(span<Register>()));
+  // Expected call to portAllocator during tick()
+  EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA));
+
+  input.getHeadSlots()[0] = uopPtr;
+  diUnit.tick();
+
+  // Ensure correct RS sizes post tick()
+  std::vector<uint64_t> rsSizes;
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes.size(), refRsSizes.size());
+  for (int i = 0; i < refRsSizes.size(); i++) {
+    if (i != RS_EAGA) {
+      EXPECT_EQ(rsSizes[i], refRsSizes[i]);
+    } else {
+      EXPECT_EQ(rsSizes[i], refRsSizes[i] - 1);
+    }
+  }
+  // Ensure no stalls recorded in tick()
+  EXPECT_EQ(diUnit.getFrontendStalls(), 0);
+  EXPECT_EQ(diUnit.getBackendStalls(), 0);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+
+  // Stall issue port
+  output[EAGA].stall(true);
+
+  // Perform issue()
+  diUnit.issue();
+  // Ensure correct RS sizes post issue()
+  rsSizes.clear();
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes.size(), refRsSizes.size());
+  for (int i = 0; i < refRsSizes.size(); i++) {
+    if (i != RS_EAGA) {
+      EXPECT_EQ(rsSizes[i], refRsSizes[i]);
+    } else {
+      EXPECT_EQ(rsSizes[i], refRsSizes[i] - 1);
+    }
+  }
+  // Ensure all output ports are empty
+  for (int i = 0; i < output.size(); i++) {
+    EXPECT_EQ(output[i].getTailSlots()[0], nullptr);
+  }
+  // Ensure portBusyStall and backend stall recorded in issue()
+  EXPECT_EQ(diUnit.getFrontendStalls(), 0);
+  EXPECT_EQ(diUnit.getBackendStalls(), 1);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 1);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+}
+
+// Try dispatch two instructions with RAW hazard after renaming, second should
+// not be issued as it is dependant on first. Use forwardOperand() to resolve
+// dependency.
+TEST_F(PipelineDispatchIssueUnitTest, createdependency_raw) {
+  // Set-up source & destination registers and ports for the instructions
+  std::array<Register, 1> srcRegs_1 = {};
+  std::array<Register, 1> destRegs_1 = {r0};
+  std::array<Register, 1> srcRegs_2 = {r0};
+  std::array<Register, 1> destRegs_2 = {r1};
+  const std::vector<uint16_t> suppPorts = {EAGA};
+
+  // All expected calls to instruction 1 during tick()
+  EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts));
+  uop->setExceptionEncountered(false);
+  EXPECT_CALL(*uop, getSourceRegisters())
+      .WillOnce(Return(span<Register>(srcRegs_1)));
+  EXPECT_CALL(*uop, isOperandReady(0)).WillOnce(Return(false));
+  EXPECT_CALL(*uop, supplyOperand(0, RegisterValue(0, 8)));
+  EXPECT_CALL(*uop, getDestinationRegisters())
+      .WillOnce(Return(span<Register>(destRegs_1)));
+  // Expected call to port allocator during tick()
+  EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA));
+  EXPECT_CALL(portAlloc, issued(EAGA));
+
+  // Process instruction 1
+  input.getHeadSlots()[0] = uopPtr;
+  diUnit.tick();
+  diUnit.issue();
+  EXPECT_EQ(output[EAGA].getTailSlots()[0], uopPtr);
+  output[EAGA].tick();
+
+  // All expected calls to instruction 2 during tick()
+  EXPECT_CALL(*uop2, getSupportedPorts()).WillOnce(ReturnRef(suppPorts));
+  uop->setExceptionEncountered(false);
+  EXPECT_CALL(*uop2, getSourceRegisters())
+      .WillOnce(Return(span<Register>(srcRegs_2)));
+  EXPECT_CALL(*uop2, isOperandReady(0)).WillOnce(Return(false));
+  EXPECT_CALL(*uop2, getDestinationRegisters())
+      .WillOnce(Return(span<Register>(destRegs_2)));
+  // Expected call to port allocator during tick()
+  EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA));
+
+  // Process instruction 2
+  input.getHeadSlots()[0] = uop2Ptr;
+  diUnit.tick();
+  diUnit.issue();
+  // Ensure correct RS sizes post tick() & issue()
+  std::vector<uint64_t> rsSizes;
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes.size(), refRsSizes.size());
+  for (int i = 0; i < refRsSizes.size(); i++) {
+    if (i != RS_EAGA) {
+      EXPECT_EQ(rsSizes[i], refRsSizes[i]);
+    } else {
+      EXPECT_EQ(rsSizes[i], refRsSizes[i] - 1);
+    }
+  }
+  // Ensure all output ports are empty
+  for (int i = 0; i < output.size(); i++) {
+    EXPECT_EQ(output[i].getTailSlots()[0], nullptr);
+  }
+  // Ensure backend stall recorded in issue()
+  EXPECT_EQ(diUnit.getFrontendStalls(), 0);
+  EXPECT_EQ(diUnit.getBackendStalls(), 1);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+
+  // Forward operand for register r0
+  std::array<RegisterValue, 1> vals = {RegisterValue(6)};
+  EXPECT_CALL(*uop2, supplyOperand(0, vals[0]));
+  EXPECT_CALL(*uop2, canExecute()).WillOnce(Return(true));
+  diUnit.forwardOperands(span<Register>(srcRegs_2), vals);
+
+  // Try issue again for instruction 2
+  EXPECT_CALL(portAlloc, issued(EAGA));
+  diUnit.issue();
+  // Ensure correct RS sizes post issue()
+  rsSizes.clear();
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+  // Ensure all output ports are empty except EAGA
+  for (int i = 0; i < output.size(); i++) {
+    if (i != EAGA)
+      EXPECT_EQ(output[i].getTailSlots()[0], nullptr);
+    else
+      EXPECT_EQ(output[i].getTailSlots()[0], uop2Ptr);
+  }
+  // Ensure no further stalls recorded in issue()
+  EXPECT_EQ(diUnit.getFrontendStalls(), 0);
+  EXPECT_EQ(diUnit.getBackendStalls(), 1);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+}
+
+// Ensure correct instructions are flushed from reservation stations and the
+// dependency matrix
+TEST_F(PipelineDispatchIssueUnitTest, purgeFlushed) {
+  // Set-up source & destination registers and ports for the instructions;
+  // creating a dependency
+  std::array<Register, 1> srcRegs_1 = {};
+  std::array<Register, 1> destRegs_1 = {r0};
+  std::array<Register, 1> srcRegs_2 = {r0};
+  std::array<Register, 1> destRegs_2 = {r1};
+  const std::vector<uint16_t> suppPorts = {EAGA};
+
+  // All expected calls to instruction 1 during tick()
+  EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts));
+  uop->setExceptionEncountered(false);
+  EXPECT_CALL(*uop, getSourceRegisters())
+      .WillOnce(Return(span<Register>(srcRegs_1)));
+  EXPECT_CALL(*uop, isOperandReady(0)).WillOnce(Return(false));
+  EXPECT_CALL(*uop, supplyOperand(0, RegisterValue(0, 8)));
+  EXPECT_CALL(*uop, getDestinationRegisters())
+      .WillOnce(Return(span<Register>(destRegs_1)));
+  // Expected call to port allocator during tick()
+  EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA));
+
+  // Process instruction 1
+  input.getHeadSlots()[0] = uopPtr;
+  diUnit.tick();
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+
+  // All expected calls to instruction 2 during tick()
+  EXPECT_CALL(*uop2, getSupportedPorts()).WillOnce(ReturnRef(suppPorts));
+  uop->setExceptionEncountered(false);
+  EXPECT_CALL(*uop2, getSourceRegisters())
+      .WillOnce(Return(span<Register>(srcRegs_2)));
+  EXPECT_CALL(*uop2, isOperandReady(0)).WillOnce(Return(false));
+  EXPECT_CALL(*uop2, getDestinationRegisters())
+      .WillOnce(Return(span<Register>(destRegs_2)));
+  // Expected call to port allocator during tick()
+  EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA));
+
+  // Process instruction 2
+  input.getHeadSlots()[0] = uop2Ptr;
+  diUnit.tick();
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+
+  // Ensure correct RS sizes post tick()
+  std::vector<uint64_t> rsSizes;
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes.size(), refRsSizes.size());
+  for (int i = 0; i < refRsSizes.size(); i++) {
+    if (i != RS_EAGA) {
+      EXPECT_EQ(rsSizes[i], refRsSizes[i]);
+    } else {
+      EXPECT_EQ(rsSizes[i], refRsSizes[i] - 2);
+    }
+  }
+  // Ensure all output ports are empty
+  for (int i = 0; i < output.size(); i++) {
+    EXPECT_EQ(output[i].getTailSlots()[0], nullptr);
+  }
+  // Ensure no stalls recorded
+  EXPECT_EQ(diUnit.getFrontendStalls(), 0);
+  EXPECT_EQ(diUnit.getBackendStalls(), 0);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+
+  // Remove flushed uops
+  EXPECT_CALL(portAlloc, deallocate(EAGA)).Times(2);
+  uopPtr->setFlushed();
+  uop2Ptr->setFlushed();
+  diUnit.purgeFlushed();
+
+  // Check reservation station sizes
+  rsSizes.clear();
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+
+  // Perform issue to see if `uop` is still present
+  diUnit.issue();
+  // Ensure all output ports are empty
+  for (int i = 0; i < output.size(); i++) {
+    EXPECT_EQ(output[i].getTailSlots()[0], nullptr);
+  }
+  // Ensure frontend stall recorded in issue()
+  EXPECT_EQ(diUnit.getFrontendStalls(), 1);
+  EXPECT_EQ(diUnit.getBackendStalls(), 0);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+
+  // Call forwardOperand() and issue() to release `uop2` (if it were still
+  // present)
+  std::array<RegisterValue, 1> vals = {RegisterValue(6)};
+  diUnit.forwardOperands(span<Register>(srcRegs_2), vals);
+  // Check reservation station sizes
+  rsSizes.clear();
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+
+  diUnit.issue();
+  // Ensure all output ports are empty
+  for (int i = 0; i < output.size(); i++) {
+    EXPECT_EQ(output[i].getTailSlots()[0], nullptr);
+  }
+  // Ensure frontend stall recorded in issue()
+  EXPECT_EQ(diUnit.getFrontendStalls(), 2);
+  EXPECT_EQ(diUnit.getBackendStalls(), 0);
+  EXPECT_EQ(diUnit.getPortBusyStalls(), 0);
+  EXPECT_EQ(diUnit.getRSStalls(), 0);
+}
+
+// Test based on a64fx config file reservation staion configuration
+TEST_F(PipelineDispatchIssueUnitTest, getRSSizes) {
+  std::vector<uint64_t> rsSizes;
+  diUnit.getRSSizes(rsSizes);
+  EXPECT_EQ(rsSizes, refRsSizes);
+}
+
+}  // namespace pipeline
+}  // namespace simeng
+
+// tick
+// issue
\ No newline at end of file
diff --git a/test/unit/pipeline/ExecuteUnitTest.cc b/test/unit/pipeline/ExecuteUnitTest.cc
index eb130f53ad..0f82593ff6 100644
--- a/test/unit/pipeline/ExecuteUnitTest.cc
+++ b/test/unit/pipeline/ExecuteUnitTest.cc
@@ -56,17 +56,35 @@ class PipelineExecuteUnitTest : public testing::Test {
   MockInstruction* thirdUop;
 
   std::shared_ptr<Instruction> uopPtr;
-  std::shared_ptr<MockInstruction> secondUopPtr;
-  std::shared_ptr<MockInstruction> thirdUopPtr;
+  std::shared_ptr<Instruction> secondUopPtr;
+  std::shared_ptr<Instruction> thirdUopPtr;
 };
 
 // Tests that the execution unit processes nothing if no instruction is present
 TEST_F(PipelineExecuteUnitTest, TickEmpty) {
+  EXPECT_TRUE(executeUnit.isEmpty());
   executeUnit.tick();
 
+  EXPECT_TRUE(executeUnit.isEmpty());
   EXPECT_EQ(output.getTailSlots()[0], nullptr);
 }
 
+// Tests that a flushed instruction is removed from the input buffer and not
+// processed through the EU
+TEST_F(PipelineExecuteUnitTest, flushedInputInsn) {
+  input.getHeadSlots()[0] = uopPtr;
+
+  // Setup instruction
+  uopPtr->setFlushed();
+  ON_CALL(*uop, canExecute()).WillByDefault(Return(true));
+
+  executeUnit.tick();
+
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+  EXPECT_EQ(executeUnit.getCycles(), 0);
+}
+
 // Tests that the execution unit executes an instruction and forwards the
 // results
 TEST_F(PipelineExecuteUnitTest, Execute) {
@@ -119,7 +137,8 @@ TEST_F(PipelineExecuteUnitTest, ExecuteBranch) {
 
   // Check that the branch predictor was updated with the results
   EXPECT_CALL(*uop, getBranchType()).Times(1);
-  EXPECT_CALL(predictor, update(2, taken, pc, BranchType::Unconditional))
+  EXPECT_CALL(predictor,
+              update(insnAddress, taken, pc, BranchType::Unconditional))
       .Times(1);
 
   // Check that empty forwarding call is made
@@ -128,8 +147,13 @@ TEST_F(PipelineExecuteUnitTest, ExecuteBranch) {
 
   executeUnit.tick();
 
+  EXPECT_EQ(uopPtr->wasBranchMispredicted(), false);
+  EXPECT_EQ(uopPtr->wasBranchTaken(), taken);
+
   EXPECT_EQ(executeUnit.shouldFlush(), false);
   EXPECT_EQ(output.getTailSlots()[0].get(), uop);
+  EXPECT_EQ(executeUnit.getBranchExecutedCount(), 1);
+  EXPECT_EQ(executeUnit.getBranchMispredictedCount(), 0);
 }
 
 // Test that an instruction that already encountered an exception will raise it
@@ -192,7 +216,7 @@ TEST_F(PipelineExecuteUnitTest, PipelineStall) {
   EXPECT_EQ(input.getHeadSlots()[0].get(), secondUop);
   EXPECT_EQ(output.getTailSlots()[0], nullptr);
   executeUnit.tick();
-  EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr);
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
   EXPECT_EQ(output.getTailSlots()[0].get(), uop);
 }
 
@@ -204,13 +228,14 @@ TEST_F(PipelineExecuteUnitTest, OperationStall) {
   uop->setLatency(5);
   uop->setStallCycles(5);
   ON_CALL(*uop, getGroup()).WillByDefault(Return(3));
+  ON_CALL(*uop, canExecute()).WillByDefault(Return(true));
   ON_CALL(*secondUop, getGroup()).WillByDefault(Return(4));
+  ON_CALL(*secondUop, canExecute()).WillByDefault(Return(true));
   ON_CALL(*thirdUop, getGroup()).WillByDefault(Return(2));
-
-  ON_CALL(*uop, canExecute()).WillByDefault(Return(true));
   ON_CALL(*thirdUop, canExecute()).WillByDefault(Return(true));
+
   EXPECT_CALL(*uop, execute()).Times(1);
-  EXPECT_CALL(*secondUop, execute()).Times(0);
+  EXPECT_CALL(*secondUop, execute()).Times(1);
   EXPECT_CALL(*thirdUop, execute()).Times(1);
 
   executeUnit.tick();
@@ -218,21 +243,116 @@ TEST_F(PipelineExecuteUnitTest, OperationStall) {
   EXPECT_EQ(output.getTailSlots()[0], nullptr);
   input.getHeadSlots()[0] = secondUopPtr;
   executeUnit.tick();
-  EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr);
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
   EXPECT_EQ(output.getTailSlots()[0], nullptr);
   input.getHeadSlots()[0] = thirdUopPtr;
   executeUnit.tick();
-  EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr);
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
   EXPECT_EQ(output.getTailSlots()[0], nullptr);
   executeUnit.tick();
-  EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr);
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
   EXPECT_EQ(output.getTailSlots()[0], nullptr);
   executeUnit.tick();
-  EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr);
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
   EXPECT_EQ(output.getTailSlots()[0].get(), uop);
   executeUnit.tick();
-  EXPECT_EQ(input.getHeadSlots()[0].get(), nullptr);
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0].get(), thirdUop);
+  executeUnit.tick();
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0].get(), secondUop);
+}
+
+// Test that a mispredicted branch instruction is properly handled
+TEST_F(PipelineExecuteUnitTest, mispredictedBranch) {
+  input.getHeadSlots()[0] = uopPtr;
+
+  ON_CALL(*uop, canExecute()).WillByDefault(Return(true));
+  // Anticipate testing instruction type; return true for branch
+  ON_CALL(*uop, isBranch()).WillByDefault(Return(true));
+  // Return branch type as unconditional by default
+  ON_CALL(*uop, getBranchType()).WillByDefault(Return(BranchType::Conditional));
+
+  const bool takenPred = false;
+  const bool taken = true;
+  const uint64_t pc = 4;
+  const uint64_t insnAddress = 16;
+  const uint64_t insnID = 5;
+
+  uop->setInstructionAddress(insnAddress);
+  uop->setInstructionId(insnID);
+  uop->setBranchPrediction({takenPred, insnAddress + 4});
+
+  EXPECT_CALL(*uop, execute()).WillOnce(Invoke([&]() {
+    uop->setExecuted(true);
+    uop->setBranchResults(taken, pc);
+  }));
+
+  // Check that the branch predictor was updated with the results
+  EXPECT_CALL(*uop, getBranchType()).Times(1);
+
+  EXPECT_CALL(predictor,
+              update(insnAddress, taken, pc, BranchType::Conditional))
+      .Times(1);
+
+  // Check that empty forwarding call is made
+  EXPECT_CALL(executionHandlers, forwardOperands(IsEmpty(), IsEmpty()))
+      .Times(1);
+
+  executeUnit.tick();
+
+  EXPECT_EQ(uopPtr->wasBranchMispredicted(), true);
+  EXPECT_EQ(uopPtr->wasBranchTaken(), taken);
+
+  EXPECT_EQ(executeUnit.shouldFlush(), true);
+  EXPECT_EQ(output.getTailSlots()[0].get(), uop);
+  EXPECT_EQ(executeUnit.getBranchExecutedCount(), 1);
+  EXPECT_EQ(executeUnit.getBranchMispredictedCount(), 1);
+  EXPECT_EQ(executeUnit.getFlushAddress(), pc);
+  EXPECT_EQ(executeUnit.getFlushInsnId(), insnID);
+}
+
+// Test that the flushing mechansim works correctly via purgeFlushed()
+TEST_F(PipelineExecuteUnitTest, purgeFlushed) {
+  input.getHeadSlots()[0] = uopPtr;
+
+  uop->setLatency(5);
+  uop->setStallCycles(5);
+  // Set up instructions so that only one is in the EU pipeline at a time
+  ON_CALL(*uop, getGroup()).WillByDefault(Return(3));
+  ON_CALL(*uop, canExecute()).WillByDefault(Return(true));
+  ON_CALL(*secondUop, getGroup()).WillByDefault(Return(4));
+  ON_CALL(*secondUop, canExecute()).WillByDefault(Return(true));
+  ON_CALL(*thirdUop, getGroup()).WillByDefault(Return(5));
+  ON_CALL(*thirdUop, canExecute()).WillByDefault(Return(true));
+
+  EXPECT_CALL(*uop, execute()).Times(0);
+  EXPECT_CALL(*secondUop, execute()).Times(0);
+  EXPECT_CALL(*thirdUop, execute()).Times(1);
+
+  // Stage all three instructions in EU pipeline
+  executeUnit.tick();
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+  input.getHeadSlots()[0] = secondUopPtr;
+  executeUnit.tick();
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+  input.getHeadSlots()[0] = thirdUopPtr;
+  executeUnit.tick();
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+
+  // Flush first two instructions
+  uopPtr->setFlushed();
+  secondUopPtr->setFlushed();
+  executeUnit.purgeFlushed();
+
+  // Ensure non-flushed instruction progresses through the pipeline
+  executeUnit.tick();
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
   EXPECT_EQ(output.getTailSlots()[0].get(), thirdUop);
+  EXPECT_TRUE(executeUnit.isEmpty());
 }
 
 }  // namespace pipeline
diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc
index fef76a9f61..f404029640 100644
--- a/test/unit/pipeline/FetchUnitTest.cc
+++ b/test/unit/pipeline/FetchUnitTest.cc
@@ -10,8 +10,15 @@
 #include "simeng/pipeline/PipelineBuffer.hh"
 
 using ::testing::_;
+using ::testing::AllOf;
+using ::testing::AnyNumber;
+using ::testing::AnyOf;
+using ::testing::AtLeast;
 using ::testing::DoAll;
 using ::testing::Field;
+using ::testing::Gt;
+using ::testing::Lt;
+using ::testing::Ne;
 using ::testing::Return;
 using ::testing::SetArgReferee;
 
@@ -24,13 +31,18 @@ class PipelineFetchUnitTest : public testing::Test {
       : output(1, {}),
         fetchBuffer({{0, 16}, 0, 0}),
         completedReads(&fetchBuffer, 1),
-        fetchUnit(output, memory, 1024, 0, 16, isa, predictor),
+        fetchUnit(output, memory, 1024, 0, blockSize, isa, predictor),
         uop(new MockInstruction),
-        uopPtr(uop) {
+        uopPtr(uop),
+        uop2(new MockInstruction),
+        uopPtr2(uop2) {
     uopPtr->setInstructionAddress(0);
   }
 
  protected:
+  const uint8_t insnMaxSizeBytes = 4;
+  const uint8_t blockSize = 16;
+
   PipelineBuffer<MacroOp> output;
   MockMemoryInterface memory;
   MockArchitecture isa;
@@ -43,6 +55,8 @@ class PipelineFetchUnitTest : public testing::Test {
 
   MockInstruction* uop;
   std::shared_ptr<Instruction> uopPtr;
+  MockInstruction* uop2;
+  std::shared_ptr<Instruction> uopPtr2;
 };
 
 // Tests that ticking a fetch unit attempts to predecode from the correct
@@ -52,7 +66,7 @@ TEST_F(PipelineFetchUnitTest, Tick) {
 
   ON_CALL(memory, getCompletedReads()).WillByDefault(Return(completedReads));
 
-  ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(4));
+  ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes));
 
   // Set the output parameter to a 1-wide macro-op
   EXPECT_CALL(isa, predecode(_, _, 0, _))
@@ -84,8 +98,8 @@ TEST_F(PipelineFetchUnitTest, TickStalled) {
 // Tests that the fetch unit will handle instructions that straddle fetch block
 // boundaries by automatically requesting the next block of data.
 TEST_F(PipelineFetchUnitTest, FetchUnaligned) {
-  MacroOp macroOp = {uopPtr};
-  ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(4));
+  MacroOp mOp = {uopPtr};
+  ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes));
   ON_CALL(memory, getCompletedReads()).WillByDefault(Return(completedReads));
 
   // Set PC to 14, so there will not be enough data to start decoding
@@ -99,12 +113,328 @@ TEST_F(PipelineFetchUnitTest, FetchUnaligned) {
   fetchUnit.requestFromPC();
 
   // Tick again, expecting that decoding will now resume
-  MemoryReadResult nextBlockValue = {{16, 16}, 0, 1};
+  MemoryReadResult nextBlockValue = {{16, blockSize}, 0, 1};
   span<MemoryReadResult> nextBlock = {&nextBlockValue, 1};
-  EXPECT_CALL(memory, getCompletedReads()).WillOnce(Return(nextBlock));
-  EXPECT_CALL(isa, predecode(_, _, _, _))
-      .WillOnce(DoAll(SetArgReferee<3>(macroOp), Return(4)));
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(nextBlock));
+  ON_CALL(isa, predecode(_, _, _, _))
+      .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4)));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  EXPECT_CALL(memory, clearCompletedReads()).Times(4);
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(8);
+  EXPECT_CALL(isa, predecode(_, _, _, _)).Times(4);
+
+  // Tick 4 times to process all 16 bytes of fetched data
+  for (int i = 0; i < 4; i++) {
+    fetchUnit.tick();
+  }
+  // Tick a 5th time to ensure all buffered bytes have been used
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(1);
+  EXPECT_CALL(isa, predecode(_, _, _, _)).Times(0);
+  fetchUnit.tick();
+}
+
+// Tests that a properly aligned PC (to the fetch block boundary) is correctly
+// fetched
+TEST_F(PipelineFetchUnitTest, fetchAligned) {
+  const uint8_t pc = 16;
+
+  ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes));
+
+  MemoryAccessTarget target = {pc, blockSize};
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(1);
+  EXPECT_CALL(memory, requestRead(target, _)).Times(1);
+
+  // Request block from Memory
+  fetchUnit.updatePC(pc);
+  fetchUnit.requestFromPC();
+
+  MacroOp mOp = {uopPtr};
+  MemoryReadResult memReadResult = {target, RegisterValue(0xFFFF, blockSize),
+                                    1};
+  span<MemoryReadResult> nextBlock = {&memReadResult, 1};
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(nextBlock));
+  ON_CALL(isa, predecode(_, _, _, _))
+      .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4)));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  EXPECT_CALL(memory, clearCompletedReads()).Times(4);
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(8);
+  EXPECT_CALL(isa, predecode(_, _, _, _)).Times(4);
+
+  // Tick 4 times to process all 16 bytes of fetched data
+  for (int i = 0; i < 4; i++) {
+    fetchUnit.tick();
+  }
+  // Tick a 5th time to ensure all buffered bytes have been used
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  EXPECT_CALL(memory, clearCompletedReads()).Times(0);
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(1);
+  EXPECT_CALL(isa, predecode(_, _, _, _)).Times(0);
+  fetchUnit.tick();
+}
+
+// Tests that halting functionality triggers correctly
+TEST_F(PipelineFetchUnitTest, halted) {
+  ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes));
+  EXPECT_FALSE(fetchUnit.hasHalted());
+  fetchUnit.tick();
+  EXPECT_FALSE(fetchUnit.hasHalted());
+
+  // Test PC >= programByteLength triggers halting
+  fetchUnit.updatePC(1024);
+  EXPECT_TRUE(fetchUnit.hasHalted());
+
+  // Test PC being incremented to >= programByteLength triggers halting
+  fetchUnit.updatePC(1008);
+  EXPECT_FALSE(fetchUnit.hasHalted());
+
+  MemoryAccessTarget target = {1008, blockSize};
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(1);
+  EXPECT_CALL(memory, requestRead(target, _)).Times(1);
+  fetchUnit.requestFromPC();
+
+  MacroOp mOp = {uopPtr};
+  MemoryReadResult memReadResult = {target, RegisterValue(0xFFFF, blockSize),
+                                    1};
+  span<MemoryReadResult> nextBlock = {&memReadResult, 1};
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(nextBlock));
+  ON_CALL(isa, predecode(_, _, _, _))
+      .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4)));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  EXPECT_CALL(memory, clearCompletedReads()).Times(4);
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(8);
+  EXPECT_CALL(isa, predecode(_, _, _, _)).Times(4);
+  // Tick 4 times to process all 16 bytes of fetched data
+  for (int i = 0; i < 4; i++) {
+    fetchUnit.tick();
+  }
+  EXPECT_TRUE(fetchUnit.hasHalted());
+}
+
+// Tests that fetching a branch instruction (predicted taken) mid block causes a
+// branch stall + discards the remaining fetched instructions
+TEST_F(PipelineFetchUnitTest, fetchTakenBranchMidBlock) {
+  const uint8_t pc = 16;
+
+  ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes));
+
+  MemoryAccessTarget target = {pc, blockSize};
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(1);
+  EXPECT_CALL(memory, requestRead(target, _)).Times(1);
+
+  // Request block from memory
+  fetchUnit.updatePC(pc);
+  fetchUnit.requestFromPC();
+
+  MacroOp mOp = {uopPtr};
+  MemoryReadResult memReadResult = {target, RegisterValue(0xFFFF, blockSize),
+                                    1};
+  span<MemoryReadResult> nextBlock = {&memReadResult, 1};
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(nextBlock));
+  ON_CALL(isa, predecode(_, _, _, _))
+      .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4)));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+
+  // For first tick, process instruction as non-branch
+  EXPECT_CALL(memory, clearCompletedReads()).Times(1);
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(2);
+  EXPECT_CALL(isa, predecode(_, _, _, _)).Times(1);
+  EXPECT_CALL(*uop, isBranch()).WillOnce(Return(false));
+  fetchUnit.tick();
+
+  // For second tick, process a taken branch meaning rest of block is discarded
+  // & a new memory block is requested
+  EXPECT_CALL(memory, getCompletedReads()).Times(0);
+  EXPECT_CALL(memory, clearCompletedReads()).Times(1);
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(2);
+  EXPECT_CALL(isa, predecode(_, _, _, _)).Times(1);
+  EXPECT_CALL(*uop, isBranch()).WillOnce(Return(true));
+  BranchType bType = BranchType::Unconditional;
+  uint64_t knownOff = 304;
+  EXPECT_CALL(*uop, getBranchType()).WillOnce(Return(bType));
+  EXPECT_CALL(*uop, getKnownOffset()).WillOnce(Return(knownOff));
+  BranchPrediction pred = {true, pc + knownOff};
+  EXPECT_CALL(predictor, predict(20, bType, knownOff)).WillOnce(Return(pred));
+  fetchUnit.tick();
+
+  // Ensure on next tick, predecode is not called
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  EXPECT_CALL(memory, clearCompletedReads()).Times(0);
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(1);
+  EXPECT_CALL(isa, predecode(_, _, _, _)).Times(0);
+  fetchUnit.tick();
+
+  // Make sure on next call to `requestFromPC`, target is address 320
+  // (pred.target)
+  target = {pred.target, blockSize};
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(1);
+  EXPECT_CALL(memory, requestRead(target, _)).Times(1);
+  fetchUnit.requestFromPC();
+}
+
+// Tests the functionality of the supplying from the Loop Buffer
+TEST_F(PipelineFetchUnitTest, supplyFromLoopBuffer) {
+  // Set instructions to be fetched from memory
+  MemoryReadResult memReadResult = {
+      {0x0, blockSize}, RegisterValue(0xFFFF, blockSize), 1};
+  span<MemoryReadResult> nextBlock = {&memReadResult, 1};
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(nextBlock));
+
+  ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes));
+
+  // Register loop boundary
+  fetchUnit.registerLoopBoundary(0xC);
+
+  // Set the instructions, within the loop body, to be returned from predecode
+  MacroOp mOp2 = {uopPtr2};
+  ON_CALL(isa, predecode(_, _, 0xC, _))
+      .WillByDefault(DoAll(SetArgReferee<3>(mOp2), Return(4)));
+  ON_CALL(*uop2, isBranch()).WillByDefault(Return(true));
+
+  MacroOp mOp = {uopPtr};
+  ON_CALL(isa, predecode(_, _, Ne(0xC), _))
+      .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4)));
+  ON_CALL(*uop, isBranch()).WillByDefault(Return(false));
+
+  // Set the expectation from the predictor to be true so a loop body will
+  // be detected
+  ON_CALL(predictor, predict(_, _, _))
+      .WillByDefault(Return(BranchPrediction({true, 0x0})));
+
+  // Set Loop Buffer state to be LoopBufferState::FILLING
+  // Tick 4 times to process all 16 bytes of fetched data
+  for (int i = 0; i < 4; i++) {
+    fetchUnit.tick();
+  }
+
+  // Fetch the next block of instructions from memory
+  fetchUnit.requestFromPC();
+
+  // Fill Loop Buffer and set its state to be LoopBufferState::SUPPLYING
+  // Tick 4 times to process all 16 bytes of fetched data
+  for (int i = 0; i < 4; i++) {
+    fetchUnit.tick();
+  }
+
+  // Whilst the Loop Buffer state is LoopBufferState::SUPPLYING, the request
+  // read should never be called
+  EXPECT_CALL(memory, requestRead(_, _)).Times(0);
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(0);
+  EXPECT_CALL(memory, getCompletedReads()).Times(0);
+  fetchUnit.requestFromPC();
+
+  // Empty output buffer and ensure the correct instructions are supplied from
+  // the Loop Buffer
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp);
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp);
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp);
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp2);
+
+  // Flush the Loop Buffer and ensure correct instructions are fetched from
+  // memory
+  fetchUnit.flushLoopBuffer();
+  fetchUnit.updatePC(0x0);
+  EXPECT_CALL(memory, requestRead(_, _)).Times(AtLeast(1));
+  EXPECT_CALL(isa, getMaxInstructionSize()).Times(AtLeast(1));
+  EXPECT_CALL(memory, getCompletedReads()).Times(AtLeast(1));
+  fetchUnit.requestFromPC();
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp);
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp);
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp);
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp2);
+}
+
+// Tests the functionality of idling the supply to the Loop Buffer one of not
+// taken branch at the loopBoundaryAddress_
+TEST_F(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) {
+  // Set instructions to be fetched from memory
+  MemoryReadResult memReadResultA = {
+      {0x0, blockSize}, RegisterValue(0xFFFF, blockSize), 1};
+  span<MemoryReadResult> nextBlockA = {&memReadResultA, 1};
+  MemoryReadResult memReadResultB = {
+      {0x10, blockSize}, RegisterValue(0xFFFF, blockSize), 1};
+  span<MemoryReadResult> nextBlockB = {&memReadResultB, 1};
+  EXPECT_CALL(memory, getCompletedReads()).WillRepeatedly(Return(nextBlockA));
+
+  ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes));
+
+  // Register loop boundary
+  fetchUnit.registerLoopBoundary(0xC);
+
+  // Set the instructions, within the loop body, to be returned from predecode
+  MacroOp mOp2 = {uopPtr2};
+  ON_CALL(isa, predecode(_, _, Gt(0x8), _))
+      .WillByDefault(DoAll(SetArgReferee<3>(mOp2), Return(4)));
+  ON_CALL(*uop2, isBranch()).WillByDefault(Return(true));
+
+  MacroOp mOp = {uopPtr};
+  ON_CALL(isa, predecode(_, _, Lt(0xC), _))
+      .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4)));
+  ON_CALL(*uop, isBranch()).WillByDefault(Return(false));
+
+  // Set the first expectation from the predictor to be true so a loop body will
+  // be detected
+  EXPECT_CALL(predictor, predict(_, _, _))
+      .WillOnce(Return(BranchPrediction({true, 0x0})));
+
+  // Set Loop Buffer state to be LoopBufferState::FILLING
+  // Tick 4 times to process all 16 bytes of fetched data
+  for (int i = 0; i < 4; i++) {
+    fetchUnit.tick();
+  }
+
+  // Fetch the next block of instructions from memory and change the expected
+  // outcome of the branch predictor
+  fetchUnit.requestFromPC();
+  EXPECT_CALL(predictor, predict(_, _, _))
+      .WillRepeatedly(Return(BranchPrediction({false, 0x0})));
+
+  // Attempt to fill Loop Buffer but prevent it on a not taken outcome at the
+  // loopBoundaryAddress_ branch
+  // Tick 4 times to process all 16 bytes of fetched data
+  for (int i = 0; i < 4; i++) {
+    fetchUnit.tick();
+  }
+
+  // Set the expectation for the next block to be fetched after the Loop Buffer
+  // state has been reset
+  const MemoryAccessTarget target = {0x10, blockSize};
+  EXPECT_CALL(memory, getCompletedReads()).WillRepeatedly(Return(nextBlockB));
+  EXPECT_CALL(memory, requestRead(target, _)).Times(1);
+
+  // Fetch the next block of instructions from memory
+  fetchUnit.requestFromPC();
+
+  // Empty output buffer and ensure the correct instructions are fetched from
+  // memory
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp2);
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp2);
+  output.fill({});
+  fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp2);
+  output.fill({});
   fetchUnit.tick();
+  EXPECT_EQ(output.getTailSlots()[0], mOp2);
 }
 
 }  // namespace pipeline
diff --git a/test/unit/pipeline/LoadStoreQueueTest.cc b/test/unit/pipeline/LoadStoreQueueTest.cc
index 414363352c..5ac8e36b04 100644
--- a/test/unit/pipeline/LoadStoreQueueTest.cc
+++ b/test/unit/pipeline/LoadStoreQueueTest.cc
@@ -17,6 +17,10 @@ const uint8_t MAX_LOADS = 32;
 const uint8_t MAX_STORES = 32;
 const uint8_t MAX_COMBINED = 64;
 
+// TODO: When the associated requestWrite(...) gets moved into the LSQ's tick()
+// functionality, we need to check the state of requestStoreQueue_ and calling
+// of requestWrite(...) in a vareity of tests
+
 class MockForwardOperandsHandler {
  public:
   MOCK_METHOD2(forwardOperands,
@@ -36,10 +40,12 @@ class LoadStoreQueueTest : public ::testing::TestWithParam<bool> {
         loadUop2(new MockInstruction),
         storeUop(new MockInstruction),
         storeUop2(new MockInstruction),
+        loadStoreUop(new MockInstruction),
         loadUopPtr(loadUop),
         loadUopPtr2(loadUop2),
         storeUopPtr(storeUop),
-        storeUopPtr2(storeUop2) {
+        storeUopPtr2(storeUop2),
+        loadStoreUopPtr(loadStoreUop) {
     // Set up sensible return values for the load uop
     ON_CALL(*loadUop, isLoad()).WillByDefault(Return(true));
     ON_CALL(*loadUop, getGeneratedAddresses())
@@ -54,7 +60,12 @@ class LoadStoreQueueTest : public ::testing::TestWithParam<bool> {
   }
 
  protected:
-  LoadStoreQueue getQueue() {
+  LoadStoreQueue getQueue(bool exclusive = false,
+                          uint16_t loadBandwidth = UINT16_MAX,
+                          uint16_t storeBandwidth = UINT16_MAX,
+                          uint16_t permittedRequests = UINT16_MAX,
+                          uint16_t permittedLoads = UINT16_MAX,
+                          uint16_t permittedStores = UINT16_MAX) {
     if (GetParam()) {
       // Combined queue
       return LoadStoreQueue(
@@ -63,7 +74,8 @@ class LoadStoreQueueTest : public ::testing::TestWithParam<bool> {
           [this](auto registers, auto values) {
             forwardOperandsHandler.forwardOperands(registers, values);
           },
-          [](auto uop) {});
+          [](auto uop) {}, exclusive, loadBandwidth, storeBandwidth,
+          permittedRequests, permittedLoads, permittedStores);
     } else {
       // Split queue
       return LoadStoreQueue(
@@ -72,7 +84,8 @@ class LoadStoreQueueTest : public ::testing::TestWithParam<bool> {
           [this](auto registers, auto values) {
             forwardOperandsHandler.forwardOperands(registers, values);
           },
-          [](auto uop) {});
+          [](auto uop) {}, exclusive, loadBandwidth, storeBandwidth,
+          permittedRequests, permittedLoads, permittedStores);
     }
   }
 
@@ -122,11 +135,13 @@ class LoadStoreQueueTest : public ::testing::TestWithParam<bool> {
   MockInstruction* loadUop2;
   MockInstruction* storeUop;
   MockInstruction* storeUop2;
+  MockInstruction* loadStoreUop;
 
   std::shared_ptr<Instruction> loadUopPtr;
   std::shared_ptr<Instruction> loadUopPtr2;
   std::shared_ptr<MockInstruction> storeUopPtr;
   std::shared_ptr<MockInstruction> storeUopPtr2;
+  std::shared_ptr<MockInstruction> loadStoreUopPtr;
 
   MockForwardOperandsHandler forwardOperandsHandler;
 
@@ -203,11 +218,49 @@ TEST_P(LoadStoreQueueTest, AddStore) {
 TEST_P(LoadStoreQueueTest, PurgeFlushedLoad) {
   auto queue = getQueue();
   auto initialLoadSpace = queue.getLoadQueueSpace();
+  MemoryReadResult completedRead = {addresses[0], data[0], 1};
+  span<MemoryReadResult> completedReads = {&completedRead, 1};
+
+  // Set load instruction attributes
+  loadUop->setSequenceId(0);
+  loadUop->setInstructionId(0);
+  loadUop2->setSequenceId(1);
+  loadUop2->setInstructionId(1);
+
+  EXPECT_CALL(*loadUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(addressesSpan));
+  EXPECT_CALL(*loadUop2, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(addressesSpan));
+
+  // Add loads to LSQ
   queue.addLoad(loadUopPtr);
+  queue.addLoad(loadUopPtr2);
+
+  // Start the first load so that its accesses can be added to
+  // requestLoadQueue_/requestedLoads_ and expect a memory access to be
+  // performed
+  queue.startLoad(loadUopPtr);
+  EXPECT_CALL(dataMemory, requestRead(addresses[0], 0)).Times(1);
+  queue.tick();
 
+  // Start the second load so that its accesses can be added to
+  // requestLoadQueue_/requestedLoads_ but flush it before it can perform a
+  // memory access
+  queue.startLoad(loadUopPtr2);
   loadUop->setFlushed();
+  loadUop2->setFlushed();
   queue.purgeFlushed();
 
+  // Expect no activity regarding memory accesses or the passing of the load
+  // instruction to the output buffer
+  EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0);
+  EXPECT_CALL(dataMemory, getCompletedReads())
+      .WillRepeatedly(Return(completedReads));
+  queue.tick();
+
+  EXPECT_EQ(completionSlots[0].getTailSlots()[0], nullptr);
   EXPECT_EQ(queue.getLoadQueueSpace(), initialLoadSpace);
 }
 
@@ -231,11 +284,21 @@ TEST_P(LoadStoreQueueTest, Load) {
   MemoryReadResult completedRead = {addresses[0], data[0], 1};
   span<MemoryReadResult> completedReads = {&completedRead, 1};
 
-  EXPECT_CALL(*loadUop, getGeneratedAddresses()).Times(AtLeast(1));
-
-  loadUop->setDataPending(addresses.size());
+  // Set load instruction attributes
+  EXPECT_CALL(*loadUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(addressesSpan));
+  loadUop->setLSQLatency(3);
 
+  // Begin load in LSQ
   queue.addLoad(loadUopPtr);
+  queue.startLoad(loadUopPtr);
+
+  // Given 3 cycle latency, no requests should occur in the first two ticks of
+  // the LSQ
+  EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0);
+  queue.tick();
+  queue.tick();
 
   // Check that a read request is made to the memory interface
   EXPECT_CALL(dataMemory, requestRead(addresses[0], _)).Times(1);
@@ -245,15 +308,40 @@ TEST_P(LoadStoreQueueTest, Load) {
       .WillRepeatedly(Return(completedReads));
 
   // Check that the LSQ supplies the right data to the instruction
-  // TODO: Replace with check for call over memory interface in future?
   EXPECT_CALL(*loadUop,
-              supplyData(0, Property(&RegisterValue::get<uint8_t>, data[0])))
+              supplyData(addresses[0].address,
+                         Property(&RegisterValue::get<uint8_t>, data[0])))
       .Times(1);
 
+  // Tick the queue to complete the load
+  queue.tick();
+
+  EXPECT_EQ(completionSlots[0].getTailSlots()[0].get(), loadUop);
+}
+
+// Tests that a queue can perform a load with no addresses
+TEST_P(LoadStoreQueueTest, LoadWithNoAddresses) {
+  loadUop->setSequenceId(1);
+  auto queue = getQueue();
+
+  span<const MemoryAccessTarget> emptyAddressesSpan = {};
+
+  EXPECT_CALL(*loadUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(emptyAddressesSpan));
+
+  // Check that a read request isn't made to the memory interface but the load
+  // completes in the LSQ
+  EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0);
+  EXPECT_CALL(*loadUop, execute()).Times(1);
+
+  queue.addLoad(loadUopPtr);
   queue.startLoad(loadUopPtr);
 
   // Tick the queue to complete the load
   queue.tick();
+
+  EXPECT_EQ(completionSlots[0].getTailSlots()[0].get(), loadUop);
 }
 
 // Tests that a queue can commit a load
@@ -275,14 +363,18 @@ TEST_P(LoadStoreQueueTest, Store) {
   auto queue = getQueue();
   auto initialStoreSpace = queue.getStoreQueueSpace();
 
-  EXPECT_CALL(*storeUop, getGeneratedAddresses()).Times(AtLeast(1));
-  EXPECT_CALL(*storeUop, getData()).Times(AtLeast(1));
-
+  // Set store instruction attributes
   storeUop->setSequenceId(1);
   storeUop->setInstructionId(1);
 
+  EXPECT_CALL(*storeUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(addressesSpan));
+  EXPECT_CALL(*storeUop, getData())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(dataSpan));
+
   queue.addStore(storeUopPtr);
-  storeUopPtr->setCommitReady();
   queue.supplyStoreData(storeUopPtr);
 
   // Check that a write request is sent to the memory interface
@@ -299,6 +391,298 @@ TEST_P(LoadStoreQueueTest, Store) {
   EXPECT_EQ(queue.getStoreQueueSpace(), initialStoreSpace);
 }
 
+// Tests that a queue can perform a load-store operation
+TEST_P(LoadStoreQueueTest, LoadStore) {
+  auto queue = getQueue();
+  auto initialLoadSpace = queue.getLoadQueueSpace();
+  auto initialStoreSpace = queue.getStoreQueueSpace();
+
+  MemoryReadResult completedRead = {addresses[0], data[0], 1};
+  span<MemoryReadResult> completedReads = {&completedRead, 1};
+
+  // Set load-store instruction attributes
+  loadStoreUop->setSequenceId(1);
+  loadStoreUop->setInstructionId(1);
+
+  EXPECT_CALL(*loadStoreUop, isLoad())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(true));
+  EXPECT_CALL(*loadStoreUop, isStoreData())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(true));
+
+  EXPECT_CALL(*loadStoreUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(addressesSpan));
+  EXPECT_CALL(*loadStoreUop, getData())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(dataSpan));
+
+  // Register load-store operation and start load portion
+  queue.addLoad(loadStoreUopPtr);
+  queue.addStore(loadStoreUopPtr);
+  queue.startLoad(loadStoreUopPtr);
+
+  // Check that a read request is made to the memory interface
+  EXPECT_CALL(dataMemory, requestRead(addresses[0], _)).Times(1);
+
+  // Expect a check against finished reads and return the result
+  EXPECT_CALL(dataMemory, getCompletedReads())
+      .WillRepeatedly(Return(completedReads));
+
+  // Check that the LSQ supplies the right data to the instruction
+  EXPECT_CALL(*loadStoreUop,
+              supplyData(addresses[0].address,
+                         Property(&RegisterValue::get<uint8_t>, data[0])))
+      .Times(1);
+
+  // Tick the queue to complete the load portion of the load-store
+  queue.tick();
+  EXPECT_EQ(completionSlots[0].getTailSlots()[0].get(), loadStoreUop);
+
+  // Check that a write request is sent to the memory interface
+  EXPECT_CALL(dataMemory,
+              requestWrite(addresses[0],
+                           Property(&RegisterValue::get<uint8_t>, data[0])))
+      .Times(1);
+
+  // Commit both potions of the load-store
+  queue.commitLoad(loadStoreUopPtr);
+  queue.commitStore(loadStoreUopPtr);
+
+  // Check the load-store was removed
+  EXPECT_EQ(queue.getLoadQueueSpace(), initialLoadSpace);
+  EXPECT_EQ(queue.getStoreQueueSpace(), initialStoreSpace);
+}
+
+// Tests that bandwidth restrictions are adhered to in a non-exclusive LSQ
+TEST_P(LoadStoreQueueTest, NonExclusiveBandwidthRestriction) {
+  auto queue = getQueue(false, 3, 3);
+
+  // Set instruction attributes
+  loadUop->setSequenceId(0);
+  loadUop->setInstructionId(0);
+  storeUop->setSequenceId(1);
+  storeUop->setInstructionId(1);
+  loadUop2->setSequenceId(2);
+  loadUop2->setInstructionId(2);
+
+  std::vector<MemoryAccessTarget> multipleAddresses = {{1, 2}, {2, 2}};
+  span<const MemoryAccessTarget> multipleAddressesSpan = {
+      multipleAddresses.data(), multipleAddresses.size()};
+  std::vector<RegisterValue> storeData = {static_cast<uint8_t>(0x01),
+                                          static_cast<uint8_t>(0x10)};
+  span<const RegisterValue> storeDataSpan = {storeData.data(),
+                                             storeData.size()};
+
+  EXPECT_CALL(*loadUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*storeUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*loadUop2, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*storeUop, getData())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(storeDataSpan));
+
+  // Add instructions to LSQ and register their accesses to be processed in the
+  // tick() function
+  queue.addLoad(loadUopPtr);
+  queue.addLoad(loadUopPtr2);
+  queue.startLoad(loadUopPtr);
+  queue.startLoad(loadUopPtr2);
+  queue.addStore(storeUopPtr);
+  queue.supplyStoreData(storeUopPtr);
+  queue.commitStore(storeUopPtr);
+
+  // Set expectations for tick logic based on set restrictions. Only 2 bytes of
+  // read and 2 bytes of write accesses should be processed per cycle (in this
+  // case that translates to one of the two addresses each uop has to handle).
+  EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(1);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(1);
+  queue.tick();
+}
+
+// Tests that bandwidth restrictions are adhered to in an exclusive LSQ
+TEST_P(LoadStoreQueueTest, ExclusiveBandwidthRestriction) {
+  auto queue = getQueue(true, 3, 3);
+
+  // Set instruction attributes
+  loadUop->setSequenceId(0);
+  loadUop->setInstructionId(0);
+  storeUop->setSequenceId(1);
+  storeUop->setInstructionId(1);
+  loadUop2->setSequenceId(2);
+  loadUop2->setInstructionId(2);
+
+  std::vector<MemoryAccessTarget> multipleAddresses = {{1, 2}, {2, 2}};
+  span<const MemoryAccessTarget> multipleAddressesSpan = {
+      multipleAddresses.data(), multipleAddresses.size()};
+  std::vector<RegisterValue> storeData = {static_cast<uint8_t>(0x01),
+                                          static_cast<uint8_t>(0x10)};
+  span<const RegisterValue> storeDataSpan = {storeData.data(),
+                                             storeData.size()};
+
+  EXPECT_CALL(*loadUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*storeUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*loadUop2, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*storeUop, getData())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(storeDataSpan));
+
+  // Add instructions to LSQ and register their accesses to be processed in the
+  // tick() function
+  queue.addLoad(loadUopPtr);
+  queue.addLoad(loadUopPtr2);
+  queue.startLoad(loadUopPtr);
+  queue.startLoad(loadUopPtr2);
+  queue.addStore(storeUopPtr);
+  queue.supplyStoreData(storeUopPtr);
+  queue.commitStore(storeUopPtr);
+
+  // Set expectations for tick logic based on set restrictions. Only 2 bytes of
+  // read and 2 bytes of write accesses should be processed per cycle (in this
+  // case that translates to one of the two addresses each uop has to handle).
+  // However, there cannot be an overlap between load and store bandwidth usage
+  // per cycle due to the LSQ being exclusive
+  EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(1);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(1);
+  queue.tick();
+}
+
+// Tests that request restrictions are adhered to in a non-exclusive LSQ
+TEST_P(LoadStoreQueueTest, NonExclusiveRequestsRestriction) {
+  auto queue = getQueue(false, UINT16_MAX, UINT16_MAX, 2, 2, 1);
+
+  // Set instruction attributes
+  loadUop->setSequenceId(0);
+  loadUop->setInstructionId(0);
+  storeUop->setSequenceId(1);
+  storeUop->setInstructionId(1);
+  loadUop2->setSequenceId(2);
+  loadUop2->setInstructionId(2);
+
+  std::vector<MemoryAccessTarget> multipleAddresses = {{1, 2}, {2, 2}};
+  span<const MemoryAccessTarget> multipleAddressesSpan = {
+      multipleAddresses.data(), multipleAddresses.size()};
+  std::vector<RegisterValue> storeData = {static_cast<uint8_t>(0x01),
+                                          static_cast<uint8_t>(0x10)};
+  span<const RegisterValue> storeDataSpan = {storeData.data(),
+                                             storeData.size()};
+
+  EXPECT_CALL(*loadUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*storeUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*loadUop2, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*storeUop, getData())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(storeDataSpan));
+
+  // Add instructions to LSQ and register their accesses to be processed in the
+  // tick() function
+  queue.addLoad(loadUopPtr);
+  queue.addLoad(loadUopPtr2);
+  queue.startLoad(loadUopPtr);
+  queue.startLoad(loadUopPtr2);
+  queue.addStore(storeUopPtr);
+  queue.supplyStoreData(storeUopPtr);
+  queue.commitStore(storeUopPtr);
+
+  // Set expectations for tick logic based on set restrictions. Either 2 reads
+  // or 1 read and 1 write should be processed per cycle
+  EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(1);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(2);
+  queue.tick();
+}
+
+// Tests that request restrictions are adhered to in an exclusive LSQ
+TEST_P(LoadStoreQueueTest, ExclusiveRequestsRestriction) {
+  auto queue = getQueue(true, UINT16_MAX, UINT16_MAX, 3, 2, 1);
+
+  // Set instruction attributes
+  loadUop->setSequenceId(0);
+  loadUop->setInstructionId(0);
+  storeUop->setSequenceId(1);
+  storeUop->setInstructionId(1);
+  loadUop2->setSequenceId(2);
+  loadUop2->setInstructionId(2);
+
+  std::vector<MemoryAccessTarget> multipleAddresses = {{1, 2}, {2, 2}};
+  span<const MemoryAccessTarget> multipleAddressesSpan = {
+      multipleAddresses.data(), multipleAddresses.size()};
+  std::vector<RegisterValue> storeData = {static_cast<uint8_t>(0x01),
+                                          static_cast<uint8_t>(0x10)};
+  span<const RegisterValue> storeDataSpan = {storeData.data(),
+                                             storeData.size()};
+
+  EXPECT_CALL(*loadUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*storeUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*loadUop2, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(multipleAddressesSpan));
+  EXPECT_CALL(*storeUop, getData())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(storeDataSpan));
+
+  // Add instructions to LSQ and register their accesses to be processed in the
+  // tick() function
+  queue.addLoad(loadUopPtr);
+  queue.addLoad(loadUopPtr2);
+  queue.startLoad(loadUopPtr);
+  queue.startLoad(loadUopPtr2);
+  queue.addStore(storeUopPtr);
+  queue.supplyStoreData(storeUopPtr);
+  queue.commitStore(storeUopPtr);
+
+  // Set expectations for tick logic based on set restrictions. Only 2 reads and
+  // 1 write should be processed per cycle. However, there cannot be an overlap
+  // between load and store requests being processed in a single cycle due to
+  // the LSQ being exclusive.
+  EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, _)).Times(0);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 0)).Times(2);
+  queue.tick();
+  EXPECT_CALL(dataMemory, requestRead(_, 2)).Times(2);
+  queue.tick();
+}
+
 // Tests that committing a store will correctly detect a direct memory order
 // violation
 TEST_P(LoadStoreQueueTest, Violation) {
@@ -456,6 +840,66 @@ TEST_P(LoadStoreQueueTest, FlushDuringConfliction) {
   queue.tick();
 }
 
+// Test that a load access exactly conflicting on a store access (matching
+// address and access size no larger) gets its data supplied when the store
+// commits
+TEST_P(LoadStoreQueueTest, SupplyDataToConfliction) {
+  auto queue = getQueue();
+
+  // Set instruction attributes
+  storeUop->setSequenceId(0);
+  storeUop->setInstructionId(0);
+  loadUop->setSequenceId(1);
+  loadUop->setInstructionId(1);
+
+  std::vector<MemoryAccessTarget> storeAddresses = {{1, 1}, {2, 1}};
+  span<const MemoryAccessTarget> storeAddressesSpan = {storeAddresses.data(),
+                                                       storeAddresses.size()};
+  std::vector<RegisterValue> storeData = {static_cast<uint8_t>(0x01),
+                                          static_cast<uint8_t>(0x10)};
+  span<const RegisterValue> storeDataSpan = {storeData.data(),
+                                             storeData.size()};
+  EXPECT_CALL(*storeUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(storeAddressesSpan));
+  EXPECT_CALL(*storeUop, getData())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(storeDataSpan));
+
+  // Set load addresses which exactly and partially overlaps on first and second
+  // store addresses respectively
+  std::vector<MemoryAccessTarget> loadAddresses = {{1, 1}, {2, 2}, {3, 1}};
+  span<const MemoryAccessTarget> loadAddressesSpan = {loadAddresses.data(),
+                                                      loadAddresses.size()};
+  EXPECT_CALL(*loadUop, getGeneratedAddresses())
+      .Times(AtLeast(1))
+      .WillRepeatedly(Return(loadAddressesSpan));
+
+  // Add instructions to LSQ
+  queue.addStore(storeUopPtr);
+  queue.addLoad(loadUopPtr);
+
+  // Supply store data so the store can commit
+  queue.supplyStoreData(storeUopPtr);
+
+  // Start the load so the confliction can be registered
+  queue.startLoad(loadUopPtr);
+
+  // Two of the accesses don't exactly conflict so they should generate memory
+  // accesses
+  EXPECT_CALL(dataMemory, requestRead(loadAddresses[1], 1)).Times(1);
+  EXPECT_CALL(dataMemory, requestRead(loadAddresses[2], 1)).Times(1);
+  queue.tick();
+
+  // The one access which does exactly conflict with a store access should get
+  // its data supplied on the store's commitment
+  EXPECT_CALL(*loadUop,
+              supplyData(loadAddresses[0].address,
+                         Property(&RegisterValue::get<uint8_t>, storeData[0])))
+      .Times(1);
+  queue.commitStore(storeUopPtr);
+}
+
 INSTANTIATE_TEST_SUITE_P(LoadStoreQueueTests, LoadStoreQueueTest,
                          ::testing::Values<bool>(false, true));
 
diff --git a/test/unit/pipeline/M1PortAllocatorTest.cc b/test/unit/pipeline/M1PortAllocatorTest.cc
new file mode 100644
index 0000000000..bdfe4c6cd7
--- /dev/null
+++ b/test/unit/pipeline/M1PortAllocatorTest.cc
@@ -0,0 +1,157 @@
+#include "gtest/gtest.h"
+#include "simeng/pipeline/M1PortAllocator.hh"
+
+namespace simeng {
+namespace pipeline {
+
+class M1PortAllocatorTest : public testing::Test {
+ public:
+  M1PortAllocatorTest() : portAllocator(portArrangement, rsArrangement) {
+    portAllocator.setRSSizeGetter(
+        [this](std::vector<uint64_t>& sizeVec) { rsSizes(sizeVec); });
+  }
+
+  void rsSizes(std::vector<uint64_t>& sizeVec) const {
+    sizeVec = rsFreeEntries;
+  }
+
+ protected:
+  // Representation of the M1 Firestorm reservation station layout
+  std::vector<uint64_t> rsFreeEntries = {24, 26, 16, 12, 28, 28, 12,
+                                         12, 12, 12, 36, 36, 36, 36};
+  // Representation of the M1 Firestorm port layout
+  const std::vector<std::vector<uint16_t>> portArrangement = {
+      {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}};
+  // Representation of the M1 Firestorm Reservation Station Arrangement
+  // std::pair<uint8_t, uint64_t> = <rsIndex, rsSize>
+  std::vector<std::pair<uint8_t, uint64_t>> rsArrangement = {
+      {0, 24}, {1, 26}, {2, 16}, {3, 12},  {4, 28},  {5, 28},  {6, 12},
+      {7, 12}, {8, 12}, {9, 12}, {10, 36}, {11, 36}, {12, 36}, {13, 36}};
+
+  M1PortAllocator portAllocator;
+};
+
+// Tests correct allocation for single port groups (i.e. INT_DIV_OR_SQRT)
+TEST_F(M1PortAllocatorTest, singlePortAllocation) {
+  std::vector<uint16_t> ports = {4};
+  EXPECT_EQ(portAllocator.allocate(ports), 4);
+}
+
+// Tests correct allocation of multiple INT_SIMPLE instructions
+TEST_F(M1PortAllocatorTest, allocationIntSimple) {
+  std::vector<uint16_t> ports = {0, 1, 2, 3, 4, 5};
+  EXPECT_EQ(portAllocator.allocate(ports), 0);
+  rsFreeEntries[0]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 1);
+  rsFreeEntries[1]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 2);
+  rsFreeEntries[2]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 3);
+  rsFreeEntries[3]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 4);
+  rsFreeEntries[4]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 5);
+  rsFreeEntries[5]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 0);
+  rsFreeEntries[0]--;
+
+  // Ensure `issued()` logic works as expected
+  portAllocator.issued(3);
+  rsFreeEntries[3]++;
+  EXPECT_EQ(portAllocator.allocate(ports), 3);
+  rsFreeEntries[3]--;
+}
+
+// Tests correct allocation of multiple BRANCH instructions
+TEST_F(M1PortAllocatorTest, allocationBranch) {
+  std::vector<uint16_t> ports = {0, 1};
+  EXPECT_EQ(portAllocator.allocate(ports), 0);
+  rsFreeEntries[0]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 1);
+  rsFreeEntries[1]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 0);
+  rsFreeEntries[0]--;
+
+  // Ensure `issued()` logic works as expected
+  portAllocator.issued(0);
+  rsFreeEntries[0]++;
+  EXPECT_EQ(portAllocator.allocate(ports), 0);
+  rsFreeEntries[0]--;
+}
+
+// Tests correct allocation of multiple INT_MUL instructions
+TEST_F(M1PortAllocatorTest, allocationIntMul) {
+  std::vector<uint16_t> ports = {4, 5};
+  EXPECT_EQ(portAllocator.allocate(ports), 4);
+  rsFreeEntries[4]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 5);
+  rsFreeEntries[5]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 4);
+  rsFreeEntries[4]--;
+
+  // Ensure `issued()` logic works as expected
+  portAllocator.issued(4);
+  rsFreeEntries[4]++;
+  EXPECT_EQ(portAllocator.allocate(ports), 4);
+  rsFreeEntries[4]--;
+}
+
+// Tests correct allocation of multiple LOAD instructions
+TEST_F(M1PortAllocatorTest, allocationLoad) {
+  std::vector<uint16_t> ports = {7, 8, 9};
+  EXPECT_EQ(portAllocator.allocate(ports), 7);
+  rsFreeEntries[7]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 8);
+  rsFreeEntries[8]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 9);
+  rsFreeEntries[9]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 7);
+  rsFreeEntries[7]--;
+
+  // Ensure `issued()` logic works as expected
+  portAllocator.issued(9);
+  rsFreeEntries[9]++;
+  EXPECT_EQ(portAllocator.allocate(ports), 9);
+  rsFreeEntries[9]--;
+}
+
+// Tests correct allocation of multiple STORE instructions
+TEST_F(M1PortAllocatorTest, allocationStore) {
+  std::vector<uint16_t> ports = {6, 7};
+  EXPECT_EQ(portAllocator.allocate(ports), 6);
+  rsFreeEntries[6]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 7);
+  rsFreeEntries[7]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 6);
+  rsFreeEntries[6]--;
+
+  // Ensure `issued()` logic works as expected
+  portAllocator.issued(6);
+  rsFreeEntries[6]++;
+  EXPECT_EQ(portAllocator.allocate(ports), 6);
+  rsFreeEntries[6]--;
+}
+
+// Tests correct allocation of multiple FP / VECTOR instructions
+TEST_F(M1PortAllocatorTest, allocationFpVec) {
+  std::vector<uint16_t> ports = {10, 11, 12, 13};
+  EXPECT_EQ(portAllocator.allocate(ports), 10);
+  rsFreeEntries[10]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 11);
+  rsFreeEntries[11]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 12);
+  rsFreeEntries[12]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 13);
+  rsFreeEntries[13]--;
+  EXPECT_EQ(portAllocator.allocate(ports), 10);
+  rsFreeEntries[10]--;
+
+  // Ensure `issued()` logic works as expected
+  portAllocator.issued(12);
+  rsFreeEntries[12]++;
+  EXPECT_EQ(portAllocator.allocate(ports), 12);
+  rsFreeEntries[12]--;
+}
+
+}  // namespace pipeline
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/pipeline/MappedRegisterFileSetTest.cc b/test/unit/pipeline/MappedRegisterFileSetTest.cc
new file mode 100644
index 0000000000..fc63657779
--- /dev/null
+++ b/test/unit/pipeline/MappedRegisterFileSetTest.cc
@@ -0,0 +1,56 @@
+#include "gtest/gtest.h"
+#include "simeng/pipeline/MappedRegisterFileSet.hh"
+
+namespace simeng {
+namespace pipeline {
+
+class MappedRegisterFileSetTest : public ::testing::Test {
+ public:
+  MappedRegisterFileSetTest()
+      : regFileSet(physRegFileStruct),
+        rat(archRegFileStruct, physRegCounts),
+        mappedRegFile(regFileSet, rat) {}
+
+ protected:
+  const std::vector<RegisterFileStructure> archRegFileStruct = {
+      {8, 10}, {24, 15}, {256, 31}};
+  const std::vector<RegisterFileStructure> physRegFileStruct = {
+      {8, 20}, {24, 30}, {256, 62}};
+  const std::vector<uint16_t> physRegCounts = {20, 30, 62};
+
+  RegisterFileSet regFileSet;
+  RegisterAliasTable rat;
+
+  MappedRegisterFileSet mappedRegFile;
+};
+
+// Ensure that with continually changing physical-architectural register mapping
+// changes, the correct register is being updated with set().
+TEST_F(MappedRegisterFileSetTest, getSet) {
+  // Loop through all register types
+  for (uint8_t i = 0; i < archRegFileStruct.size(); i++) {
+    // Keep allocating the same register to a) keep past values and b) more
+    // easily verify correct functionality
+    const uint16_t maxRegTag = archRegFileStruct[i].quantity - 1;
+    const uint16_t regSize = archRegFileStruct[i].bytes;
+    const Register rMax = {i, maxRegTag};
+
+    std::vector<Register> physRegs;
+    for (int j = 2; j < 12; j++) {
+      physRegs.push_back(rat.allocate(rMax));
+      RegisterValue regVal = RegisterValue(j, regSize);
+      mappedRegFile.set(rMax, regVal);
+      EXPECT_EQ(mappedRegFile.get(rMax), regVal);
+    }
+
+    for (int k = 0; k < 10; k++) {
+      // RAT constructed where Arch-Phys mapping is 1:1. So, first re-mapped
+      // value will be to maxArchRegRag + 1
+      EXPECT_EQ(physRegs[k].tag, maxRegTag + k + 1);
+      EXPECT_EQ(physRegs[k].type, i);
+      EXPECT_EQ(regFileSet.get(physRegs[k]), RegisterValue(k + 2, regSize));
+    }
+  }
+}
+}  // namespace pipeline
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/pipeline/RegisterAliasTableTest.cc b/test/unit/pipeline/RegisterAliasTableTest.cc
index 99b3daf059..6b6f1d9985 100644
--- a/test/unit/pipeline/RegisterAliasTableTest.cc
+++ b/test/unit/pipeline/RegisterAliasTableTest.cc
@@ -62,11 +62,15 @@ TEST_F(RegisterAliasTableTest, AllocateIndependent) {
   auto multiRAT =
       RegisterAliasTable({{8, architecturalCount}, {8, architecturalCount}},
                          {physicalCount, physicalCount});
+  auto initialFreeRegisters0 = multiRAT.freeRegistersAvailable(0);
   auto initialFreeRegisters1 = multiRAT.freeRegistersAvailable(1);
 
   multiRAT.allocate(reg);
 
-  // Check that the same number of physical registers are still available
+  // Check 1 fewer physical registers are now available for regFile 0
+  EXPECT_EQ(multiRAT.freeRegistersAvailable(0), initialFreeRegisters0 - 1);
+  // Check that the same number of physical registers are still available for
+  // regFile 1
   EXPECT_EQ(multiRAT.freeRegistersAvailable(1), initialFreeRegisters1);
 }
 
diff --git a/test/unit/pipeline/RenameUnitTest.cc b/test/unit/pipeline/RenameUnitTest.cc
new file mode 100644
index 0000000000..23a1e3dd71
--- /dev/null
+++ b/test/unit/pipeline/RenameUnitTest.cc
@@ -0,0 +1,461 @@
+#include "../MockBranchPredictor.hh"
+#include "../MockInstruction.hh"
+#include "../MockMemoryInterface.hh"
+#include "gtest/gtest.h"
+#include "simeng/pipeline/RenameUnit.hh"
+
+namespace simeng {
+
+namespace pipeline {
+
+using ::testing::_;
+using ::testing::Return;
+
+class RenameUnitTest : public testing::Test {
+ public:
+  RenameUnitTest()
+      : input(1, nullptr),
+        output(1, nullptr),
+        rat(archRegFileStruct, physRegCounts),
+        lsq(
+            lsqQueueSize, lsqQueueSize, memory, completionSlots,
+            [](auto registers, auto values) {}, [](auto insn) {}),
+        rob(
+            robSize, rat, lsq, [](auto insn) {}, [](auto branchAddr) {},
+            predictor, 16, 4),
+        renameUnit(input, output, rob, rat, lsq, physRegCounts.size()),
+        uop(new MockInstruction),
+        uop2(new MockInstruction),
+        uop3(new MockInstruction),
+        uopPtr(uop),
+        uop2Ptr(uop2),
+        uop3Ptr(uop3) {}
+
+ protected:
+  // 3rd register type has same arch & physical counts meaning renaming is not
+  // permitted.
+  const std::vector<RegisterFileStructure> archRegFileStruct = {
+      {8, 10}, {24, 15}, {256, 31}};
+  const std::vector<RegisterFileStructure> physRegFileStruct = {
+      {8, 20}, {24, 30}, {256, 31}};
+  const std::vector<uint16_t> physRegCounts = {20, 30, 31};
+
+  const Register r0 = {0, 0};
+  const Register r1 = {1, 2};
+  const Register r2 = {2, 4};
+
+  const unsigned int robSize = 8;
+  const unsigned int lsqQueueSize = 10;
+
+  PipelineBuffer<std::shared_ptr<Instruction>> input;
+  PipelineBuffer<std::shared_ptr<Instruction>> output;
+
+  MockMemoryInterface memory;
+  MockBranchPredictor predictor;
+  span<PipelineBuffer<std::shared_ptr<Instruction>>> completionSlots;
+
+  RegisterAliasTable rat;
+  LoadStoreQueue lsq;
+  ReorderBuffer rob;
+
+  RenameUnit renameUnit;
+
+  MockInstruction* uop;
+  MockInstruction* uop2;
+  MockInstruction* uop3;
+
+  std::shared_ptr<Instruction> uopPtr;
+  std::shared_ptr<Instruction> uop2Ptr;
+  std::shared_ptr<Instruction> uop3Ptr;
+};
+
+// Test the correct functionality when input buffer and unit is empty
+TEST_F(RenameUnitTest, emptyTick) {
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+
+  renameUnit.tick();
+
+  // Check output buffers and statistics are as expected
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+  EXPECT_EQ(renameUnit.getAllocationStalls(), 0);
+  EXPECT_EQ(renameUnit.getROBStalls(), 0);
+  EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0);
+  EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0);
+}
+
+// Test the normal functionality of an instruction passing through the unit
+TEST_F(RenameUnitTest, tick) {
+  input.getHeadSlots()[0] = uopPtr;
+
+  std::array<Register, 1> destRegs = {r0};
+  std::array<Register, 2> srcRegs = {r0, r1};
+  ON_CALL(*uop, getDestinationRegisters())
+      .WillByDefault(Return(span<Register>(destRegs)));
+  ON_CALL(*uop, getSourceRegisters())
+      .WillByDefault(Return(span<Register>(srcRegs)));
+  ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false));
+  ON_CALL(*uop, isLoad()).WillByDefault(Return(false));
+  ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(false));
+
+  // Setup expected calls to MockInstruction
+  EXPECT_CALL(*uop, isLoad()).Times(1);
+  EXPECT_CALL(*uop, isStoreAddress()).Times(1);
+  EXPECT_CALL(*uop, getDestinationRegisters()).Times(1);
+  EXPECT_CALL(*uop, getSourceRegisters()).Times(1);
+  EXPECT_CALL(*uop, isOperandReady(_)).Times(2);
+  EXPECT_CALL(*uop, renameSource(_, _)).Times(2);
+  EXPECT_CALL(*uop, renameDestination(0, _)).Times(1);
+  renameUnit.tick();
+
+  // Check output buffers and statistics are as expected
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0].get(), uop);
+  EXPECT_EQ(renameUnit.getAllocationStalls(), 0);
+  EXPECT_EQ(renameUnit.getROBStalls(), 0);
+  EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0);
+  EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0);
+
+  // Check ROB, LSQ, and RAT mappings have been changed accordingly
+  EXPECT_EQ(rob.size(), 1);
+  EXPECT_EQ(rob.getFreeSpace(), robSize - 1);
+  EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize * 2);
+  const Register mappedReg = {0, archRegFileStruct[0].quantity};
+  EXPECT_EQ(rat.getMapping(r0), mappedReg);
+  EXPECT_EQ(rat.getMapping(r1), r1);
+}
+
+// Ensure input buffer is stalled when output buffer is stalled
+TEST_F(RenameUnitTest, outputStall) {
+  output.stall(true);
+  renameUnit.tick();
+  EXPECT_TRUE(input.isStalled());
+}
+
+// Test that an instruction exception is properly dealt with
+TEST_F(RenameUnitTest, uopException) {
+  input.getHeadSlots()[0] = uopPtr;
+  uop->setExceptionEncountered(true);
+
+  renameUnit.tick();
+
+  EXPECT_TRUE(uopPtr->canCommit());
+
+  EXPECT_EQ(rob.size(), 1);
+  EXPECT_EQ(rob.getFreeSpace(), robSize - 1);
+  EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize * 2);
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+
+  EXPECT_EQ(renameUnit.getAllocationStalls(), 0);
+  EXPECT_EQ(renameUnit.getROBStalls(), 0);
+  EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0);
+  EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0);
+}
+
+// Test for when no physical registers are available
+TEST_F(RenameUnitTest, noFreeRegs) {
+  // Take up all type-0 physical registers
+  // All arch regs originally mapped to phys reg, meaning remaing
+  // regs = physCount - archCount
+  for (int i = 0; i < physRegCounts[0] - archRegFileStruct[0].quantity; i++) {
+    rat.allocate(r0);
+  }
+  EXPECT_EQ(rat.freeRegistersAvailable(0), 0);
+
+  input.getHeadSlots()[0] = uopPtr;
+
+  std::array<Register, 1> destRegs = {r0};
+  ON_CALL(*uop, getDestinationRegisters())
+      .WillByDefault(Return(span<Register>(destRegs)));
+  ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false));
+  ON_CALL(*uop, isLoad()).WillByDefault(Return(false));
+  ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(false));
+
+  // Setup expected calls to MockInstruction
+  EXPECT_CALL(*uop, isLoad()).Times(1);
+  EXPECT_CALL(*uop, isStoreAddress()).Times(1);
+  EXPECT_CALL(*uop, getDestinationRegisters()).Times(1);
+  renameUnit.tick();
+
+  EXPECT_TRUE(input.isStalled());
+
+  EXPECT_EQ(rob.size(), 0);
+  EXPECT_EQ(rob.getFreeSpace(), robSize);
+  EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize * 2);
+  EXPECT_EQ(input.getHeadSlots()[0], uopPtr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+
+  EXPECT_EQ(renameUnit.getAllocationStalls(), 1);
+  EXPECT_EQ(renameUnit.getROBStalls(), 0);
+  EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0);
+  EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0);
+}
+
+// Tests that when ROB is full, no renaming occurs
+TEST_F(RenameUnitTest, fullROB) {
+  // Pre-fill ROB
+  for (int i = 0; i < robSize; i++) {
+    rob.reserve(uopPtr);
+  }
+  EXPECT_EQ(rob.getFreeSpace(), 0);
+
+  input.getHeadSlots()[0] = uopPtr;
+  renameUnit.tick();
+
+  EXPECT_TRUE(input.isStalled());
+
+  EXPECT_EQ(rob.size(), robSize);
+  EXPECT_EQ(rob.getFreeSpace(), 0);
+  EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize * 2);
+  EXPECT_EQ(input.getHeadSlots()[0], uopPtr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+
+  EXPECT_EQ(renameUnit.getAllocationStalls(), 0);
+  EXPECT_EQ(renameUnit.getROBStalls(), 1);
+  EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0);
+  EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0);
+}
+
+// Test a LOAD instruction is handled correctly
+TEST_F(RenameUnitTest, loadUop) {
+  input.getHeadSlots()[0] = uopPtr;
+
+  std::array<Register, 1> destRegs = {r0};
+  std::array<Register, 2> srcRegs = {r0, r1};
+  ON_CALL(*uop, getDestinationRegisters())
+      .WillByDefault(Return(span<Register>(destRegs)));
+  ON_CALL(*uop, getSourceRegisters())
+      .WillByDefault(Return(span<Register>(srcRegs)));
+  ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false));
+  ON_CALL(*uop, isLoad()).WillByDefault(Return(true));
+  ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(false));
+
+  // Setup expected calls to MockInstruction
+  EXPECT_CALL(*uop, isLoad()).Times(1);
+  EXPECT_CALL(*uop, isStoreAddress()).Times(1);
+  EXPECT_CALL(*uop, getDestinationRegisters()).Times(1);
+  EXPECT_CALL(*uop, getSourceRegisters()).Times(1);
+  EXPECT_CALL(*uop, isOperandReady(_)).Times(2);
+  EXPECT_CALL(*uop, renameSource(_, _)).Times(2);
+  EXPECT_CALL(*uop, renameDestination(0, _)).Times(1);
+  renameUnit.tick();
+
+  // Check output buffers and statistics are as expected
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0].get(), uop);
+  EXPECT_EQ(renameUnit.getAllocationStalls(), 0);
+  EXPECT_EQ(renameUnit.getROBStalls(), 0);
+  EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0);
+  EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0);
+
+  // Check ROB, LSQ, and RAT mappings have been changed accordingly
+  EXPECT_EQ(rob.size(), 1);
+  EXPECT_EQ(rob.getFreeSpace(), robSize - 1);
+  EXPECT_EQ(lsq.getLoadQueueSpace(), lsqQueueSize - 1);
+  EXPECT_EQ(lsq.getStoreQueueSpace(), lsqQueueSize);
+  EXPECT_EQ(lsq.getTotalSpace(), (lsqQueueSize * 2) - 1);
+  const Register mappedReg = {0, archRegFileStruct[0].quantity};
+  EXPECT_EQ(rat.getMapping(r0), mappedReg);
+  EXPECT_EQ(rat.getMapping(r1), r1);
+}
+
+// Test a LOAD instruction is handled correctly when Load queue is full
+TEST_F(RenameUnitTest, loadUopQueueFull) {
+  // pre-fill Load Queue
+  for (int i = 0; i < lsqQueueSize; i++) {
+    lsq.addLoad(uopPtr);
+  }
+  EXPECT_EQ(lsq.getLoadQueueSpace(), 0);
+
+  input.getHeadSlots()[0] = uopPtr;
+
+  std::array<Register, 1> destRegs = {r0};
+  std::array<Register, 2> srcRegs = {r0, r1};
+  ON_CALL(*uop, getDestinationRegisters())
+      .WillByDefault(Return(span<Register>(destRegs)));
+  ON_CALL(*uop, getSourceRegisters())
+      .WillByDefault(Return(span<Register>(srcRegs)));
+  ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false));
+  ON_CALL(*uop, isLoad()).WillByDefault(Return(true));
+  ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(false));
+
+  // Setup expected calls to MockInstruction
+  EXPECT_CALL(*uop, isLoad()).Times(1);
+  EXPECT_CALL(*uop, isStoreAddress()).Times(1);
+  renameUnit.tick();
+
+  EXPECT_TRUE(input.isStalled());
+
+  // Check output buffers and statistics are as expected
+  EXPECT_EQ(input.getHeadSlots()[0], uopPtr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+  EXPECT_EQ(renameUnit.getAllocationStalls(), 0);
+  EXPECT_EQ(renameUnit.getROBStalls(), 0);
+  EXPECT_EQ(renameUnit.getLoadQueueStalls(), 1);
+  EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0);
+
+  // Check ROB, LSQ, and RAT mappings have been changed accordingly
+  EXPECT_EQ(rob.size(), 0);
+  EXPECT_EQ(rob.getFreeSpace(), robSize);
+  EXPECT_EQ(lsq.getLoadQueueSpace(), 0);
+  EXPECT_EQ(lsq.getStoreQueueSpace(), lsqQueueSize);
+  EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize);
+}
+
+// Test a STORE instruction is handled correctly
+TEST_F(RenameUnitTest, storeUop) {
+  input.getHeadSlots()[0] = uopPtr;
+
+  std::array<Register, 1> destRegs = {r0};
+  std::array<Register, 2> srcRegs = {r0, r1};
+  ON_CALL(*uop, getDestinationRegisters())
+      .WillByDefault(Return(span<Register>(destRegs)));
+  ON_CALL(*uop, getSourceRegisters())
+      .WillByDefault(Return(span<Register>(srcRegs)));
+  ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false));
+  ON_CALL(*uop, isLoad()).WillByDefault(Return(false));
+  ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(true));
+
+  // Setup expected calls to MockInstruction
+  EXPECT_CALL(*uop, isLoad()).Times(1);
+  EXPECT_CALL(*uop, isStoreAddress()).Times(1);
+  EXPECT_CALL(*uop, getDestinationRegisters()).Times(1);
+  EXPECT_CALL(*uop, getSourceRegisters()).Times(1);
+  EXPECT_CALL(*uop, isOperandReady(_)).Times(2);
+  EXPECT_CALL(*uop, renameSource(_, _)).Times(2);
+  EXPECT_CALL(*uop, renameDestination(0, _)).Times(1);
+  renameUnit.tick();
+
+  // Check output buffers and statistics are as expected
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0].get(), uop);
+  EXPECT_EQ(renameUnit.getAllocationStalls(), 0);
+  EXPECT_EQ(renameUnit.getROBStalls(), 0);
+  EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0);
+  EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0);
+
+  // Check ROB, LSQ, and RAT mappings have been changed accordingly
+  EXPECT_EQ(rob.size(), 1);
+  EXPECT_EQ(rob.getFreeSpace(), robSize - 1);
+  EXPECT_EQ(lsq.getLoadQueueSpace(), lsqQueueSize);
+  EXPECT_EQ(lsq.getStoreQueueSpace(), lsqQueueSize - 1);
+  EXPECT_EQ(lsq.getTotalSpace(), (lsqQueueSize * 2) - 1);
+  const Register mappedReg = {0, archRegFileStruct[0].quantity};
+  EXPECT_EQ(rat.getMapping(r0), mappedReg);
+  EXPECT_EQ(rat.getMapping(r1), r1);
+}
+
+// Test a STORE instruction is handled correctly when Store queue is full
+TEST_F(RenameUnitTest, storeUopQueueFull) {
+  // pre-fill Load Queue
+  for (int i = 0; i < lsqQueueSize; i++) {
+    lsq.addStore(uopPtr);
+  }
+  EXPECT_EQ(lsq.getStoreQueueSpace(), 0);
+
+  input.getHeadSlots()[0] = uopPtr;
+
+  std::array<Register, 1> destRegs = {r0};
+  std::array<Register, 2> srcRegs = {r0, r1};
+  ON_CALL(*uop, getDestinationRegisters())
+      .WillByDefault(Return(span<Register>(destRegs)));
+  ON_CALL(*uop, getSourceRegisters())
+      .WillByDefault(Return(span<Register>(srcRegs)));
+  ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false));
+  ON_CALL(*uop, isLoad()).WillByDefault(Return(false));
+  ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(true));
+
+  // Setup expected calls to MockInstruction
+  EXPECT_CALL(*uop, isLoad()).Times(1);
+  EXPECT_CALL(*uop, isStoreAddress()).Times(1);
+  renameUnit.tick();
+
+  EXPECT_TRUE(input.isStalled());
+
+  // Check output buffers and statistics are as expected
+  EXPECT_EQ(input.getHeadSlots()[0], uopPtr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+  EXPECT_EQ(renameUnit.getAllocationStalls(), 0);
+  EXPECT_EQ(renameUnit.getROBStalls(), 0);
+  EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0);
+  EXPECT_EQ(renameUnit.getStoreQueueStalls(), 1);
+
+  // Check ROB, LSQ, and RAT mappings have been changed accordingly
+  EXPECT_EQ(rob.size(), 0);
+  EXPECT_EQ(rob.getFreeSpace(), robSize);
+  EXPECT_EQ(lsq.getLoadQueueSpace(), lsqQueueSize);
+  EXPECT_EQ(lsq.getStoreQueueSpace(), 0);
+  EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize);
+}
+
+// Test to ensure Serialized destinations work correctly
+TEST_F(RenameUnitTest, serializedDest) {
+  // A serialized uop can only proceed when the ROB is empty. Pre-add an
+  // instruction to ensure uop stalls correctly in renameUnit Pre-fill ROB
+  rob.reserve(uop2Ptr);
+  EXPECT_EQ(rob.size(), 1);
+
+  // A serialized uop is caused when the destination register cannot be renamed
+  // - i.e. the number of archRegs is the same as physRegs
+  input.getHeadSlots()[0] = uopPtr;
+  std::array<Register, 1> destRegs = {r2};
+  std::array<Register, 2> srcRegs = {r0, r1};
+  ON_CALL(*uop, getDestinationRegisters())
+      .WillByDefault(Return(span<Register>(destRegs)));
+  ON_CALL(*uop, getSourceRegisters())
+      .WillByDefault(Return(span<Register>(srcRegs)));
+  ON_CALL(*uop, isOperandReady(_)).WillByDefault(Return(false));
+  ON_CALL(*uop, isLoad()).WillByDefault(Return(false));
+  ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(false));
+
+  // On first tick, input should stall and uop should not proceed through
+  // renameUnit
+  EXPECT_CALL(*uop, isLoad()).Times(1);
+  EXPECT_CALL(*uop, isStoreAddress()).Times(1);
+  EXPECT_CALL(*uop, getDestinationRegisters()).Times(1);
+  renameUnit.tick();
+
+  EXPECT_TRUE(input.isStalled());
+  EXPECT_EQ(input.getHeadSlots()[0], uopPtr);
+  EXPECT_EQ(output.getTailSlots()[0], nullptr);
+
+  // Empty item in ROB
+  EXPECT_EQ(rob.size(), 1);
+  uop2Ptr->setCommitReady();
+  EXPECT_CALL(*uop2, getDestinationRegisters()).Times(1);
+  EXPECT_CALL(*uop2, isLoad()).WillOnce(Return(false));
+  EXPECT_CALL(*uop2, isStoreAddress()).WillOnce(Return(false));
+  EXPECT_CALL(*uop2, isBranch()).WillOnce(Return(false));
+  rob.commit(1);
+  EXPECT_EQ(rob.size(), 0);
+
+  // Try tick again
+  EXPECT_CALL(*uop, isLoad()).Times(1);
+  EXPECT_CALL(*uop, isStoreAddress()).Times(1);
+  EXPECT_CALL(*uop, getDestinationRegisters()).Times(1);
+  EXPECT_CALL(*uop, getSourceRegisters()).Times(1);
+  EXPECT_CALL(*uop, isOperandReady(_)).Times(2);
+  EXPECT_CALL(*uop, renameSource(_, _)).Times(2);
+  renameUnit.tick();
+
+  // Check output buffers and statistics are as expected
+  EXPECT_EQ(input.getHeadSlots()[0], nullptr);
+  EXPECT_EQ(output.getTailSlots()[0].get(), uop);
+  EXPECT_EQ(renameUnit.getAllocationStalls(), 0);
+  EXPECT_EQ(renameUnit.getROBStalls(), 0);
+  EXPECT_EQ(renameUnit.getLoadQueueStalls(), 0);
+  EXPECT_EQ(renameUnit.getStoreQueueStalls(), 0);
+
+  // Check ROB, LSQ, and RAT mappings have been changed accordingly
+  EXPECT_EQ(rob.size(), 1);
+  EXPECT_EQ(rob.getFreeSpace(), robSize - 1);
+  EXPECT_EQ(lsq.getTotalSpace(), lsqQueueSize * 2);
+  EXPECT_EQ(rat.getMapping(r0), r0);
+  EXPECT_EQ(rat.getMapping(r1), r1);
+  EXPECT_EQ(rat.getMapping(r2), r2);
+}
+
+}  // namespace pipeline
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/pipeline/ReorderBufferTest.cc b/test/unit/pipeline/ReorderBufferTest.cc
index df217968e3..89039439e9 100644
--- a/test/unit/pipeline/ReorderBufferTest.cc
+++ b/test/unit/pipeline/ReorderBufferTest.cc
@@ -1,11 +1,9 @@
 #include "../MockBranchPredictor.hh"
 #include "../MockInstruction.hh"
 #include "../MockMemoryInterface.hh"
-#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "simeng/Instruction.hh"
 #include "simeng/pipeline/LoadStoreQueue.hh"
-#include "simeng/pipeline/RegisterAliasTable.hh"
 #include "simeng/pipeline/ReorderBuffer.hh"
 
 using ::testing::_;
@@ -31,12 +29,15 @@ class ReorderBufferTest : public testing::Test {
             [](auto registers, auto values) {}, [](auto uop) {}),
         uop(new MockInstruction),
         uop2(new MockInstruction),
+        uop3(new MockInstruction),
         uopPtr(uop),
         uopPtr2(uop2),
+        uopPtr3(uop3),
         reorderBuffer(
             maxROBSize, rat, lsq,
             [this](auto insn) { exceptionHandler.raiseException(insn); },
-            [](auto branchAddress) {}, predictor, 0, 0) {}
+            [this](auto branchAddress) { loopBoundaryAddr = branchAddress; },
+            predictor, 4, 2) {}
 
  protected:
   const uint8_t maxLSQLoads = 32;
@@ -52,13 +53,17 @@ class ReorderBufferTest : public testing::Test {
 
   MockInstruction* uop;
   MockInstruction* uop2;
+  MockInstruction* uop3;
 
   std::shared_ptr<Instruction> uopPtr;
-  std::shared_ptr<MockInstruction> uopPtr2;
+  std::shared_ptr<Instruction> uopPtr2;
+  std::shared_ptr<Instruction> uopPtr3;
 
   MockMemoryInterface dataMemory;
 
   ReorderBuffer reorderBuffer;
+
+  uint64_t loopBoundaryAddr = 0;
 };
 
 // Tests that an instruction can have a slot reserved in the ROB and be
@@ -100,6 +105,7 @@ TEST_F(ReorderBufferTest, Commit) {
 
   EXPECT_EQ(committed, 1);
   EXPECT_EQ(reorderBuffer.size(), 0);
+  EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 1);
 }
 
 // Tests that the reorder buffer won't commit an instruction if it's not ready
@@ -110,6 +116,7 @@ TEST_F(ReorderBufferTest, CommitNotReady) {
 
   EXPECT_EQ(committed, 0);
   EXPECT_EQ(reorderBuffer.size(), 1);
+  EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 0);
 }
 
 // Tests that the reorder buffer won't commit a ready instruction if it's not at
@@ -124,6 +131,7 @@ TEST_F(ReorderBufferTest, CommitHeadNotReady) {
 
   EXPECT_EQ(committed, 0);
   EXPECT_EQ(reorderBuffer.size(), 2);
+  EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 0);
 }
 
 // Tests that the reorder buffer can commit multiple ready instructions
@@ -138,6 +146,7 @@ TEST_F(ReorderBufferTest, CommitMultiple) {
 
   EXPECT_EQ(committed, 2);
   EXPECT_EQ(reorderBuffer.size(), 0);
+  EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 2);
 }
 
 // Tests that the reorder buffer correctly informs the LSQ when committing a
@@ -153,6 +162,7 @@ TEST_F(ReorderBufferTest, CommitLoad) {
 
   // Check that the load was removed from the LSQ
   EXPECT_EQ(lsq.getLoadQueueSpace(), maxLSQLoads);
+  EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 1);
 }
 
 // Tests that the reorder buffer correctly triggers a store upon commit
@@ -190,6 +200,7 @@ TEST_F(ReorderBufferTest, CommitStore) {
 
   // Check that the store was committed and removed from the LSQ
   EXPECT_EQ(lsq.getStoreQueueSpace(), maxLSQStores);
+  EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 1);
 
   // Tick lsq to complete store
   lsq.tick();
@@ -221,6 +232,231 @@ TEST_F(ReorderBufferTest, Exception) {
   auto committed = reorderBuffer.commit(1);
 
   EXPECT_EQ(committed, 1);
+  EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 1);
+}
+
+// Test the reorder buffer correctly sets a macro-op to commitReady when all of
+// its associated micro-ops have been
+TEST_F(ReorderBufferTest, commitMicroOps) {
+  // Reserve all microOps
+  uop->setIsMicroOp(true);
+  uop->setIsLastMicroOp(false);
+  uop2->setIsMicroOp(true);
+  uop2->setIsLastMicroOp(false);
+  uop3->setIsMicroOp(true);
+  uop3->setIsLastMicroOp(true);
+  reorderBuffer.reserve(uopPtr);
+  reorderBuffer.reserve(uopPtr2);
+  reorderBuffer.reserve(uopPtr3);
+  EXPECT_EQ(reorderBuffer.size(), 3);
+
+  EXPECT_EQ(uopPtr->getInstructionId(), 0);
+  EXPECT_EQ(uopPtr2->getInstructionId(), 0);
+  EXPECT_EQ(uopPtr3->getInstructionId(), 0);
+
+  // No micro-ops are waiting commit. Make sure they're not commit ready after
+  // call to `commitMicroOps`
+  reorderBuffer.commitMicroOps(0);
+  EXPECT_FALSE(uopPtr->canCommit());
+  EXPECT_FALSE(uopPtr2->canCommit());
+  EXPECT_FALSE(uopPtr3->canCommit());
+
+  // Set middle instruction as waitingCommit - ensure still not set commit ready
+  uop->setWaitingCommit();
+  reorderBuffer.commitMicroOps(0);
+  EXPECT_FALSE(uopPtr->canCommit());
+  EXPECT_FALSE(uopPtr2->canCommit());
+  EXPECT_FALSE(uopPtr3->canCommit());
+
+  // Set last instruction as waitingCommit - ensure still not set commit ready
+  uop3->setWaitingCommit();
+  reorderBuffer.commitMicroOps(0);
+  EXPECT_FALSE(uopPtr->canCommit());
+  EXPECT_FALSE(uopPtr2->canCommit());
+  EXPECT_FALSE(uopPtr3->canCommit());
+
+  // Set first instruction as waitingCommit - ensure still they are set commit
+  // ready now all micro-ops are done
+  uop2->setWaitingCommit();
+  reorderBuffer.commitMicroOps(0);
+  EXPECT_TRUE(uopPtr->canCommit());
+  EXPECT_TRUE(uopPtr2->canCommit());
+  EXPECT_TRUE(uopPtr3->canCommit());
+
+  // Now call commit in ROB and make sure micro-ops are committed properly
+  unsigned int committed = reorderBuffer.commit(3);
+  EXPECT_EQ(committed, 3);
+  EXPECT_EQ(reorderBuffer.getInstructionsCommittedCount(), 1);
+  EXPECT_EQ(reorderBuffer.size(), 0);
+}
+
+// Test that a detected violating load in the lsq leads to a flush
+TEST_F(ReorderBufferTest, violatingLoad) {
+  const uint64_t strAddr = 16;
+  const uint64_t strSize = 4;
+  const uint64_t ldAddr = 18;
+  const uint64_t ldSize = 4;
+
+  // Init Store
+  const MemoryAccessTarget strTarget = {strAddr, strSize};
+  span<const MemoryAccessTarget> strTargetSpan = {&strTarget, 1};
+  ON_CALL(*uop, getGeneratedAddresses()).WillByDefault(Return(strTargetSpan));
+  ON_CALL(*uop, isStoreAddress()).WillByDefault(Return(true));
+  ON_CALL(*uop, isStoreData()).WillByDefault(Return(true));
+  uopPtr->setSequenceId(0);
+  uopPtr->setInstructionId(0);
+  lsq.addStore(uopPtr);
+  reorderBuffer.reserve(uopPtr);
+  // Init load
+  const MemoryAccessTarget ldTarget = {ldAddr, ldSize};
+  span<const MemoryAccessTarget> ldTargetSpan = {&ldTarget, 1};
+  ON_CALL(*uop2, getGeneratedAddresses()).WillByDefault(Return(ldTargetSpan));
+  ON_CALL(*uop2, isLoad()).WillByDefault(Return(true));
+  uopPtr2->setSequenceId(1);
+  uopPtr2->setInstructionId(1);
+  uopPtr2->setInstructionAddress(4096);
+  lsq.addLoad(uopPtr2);
+  reorderBuffer.reserve(uopPtr2);
+
+  EXPECT_EQ(reorderBuffer.size(), 2);
+
+  // Start load "Out of order"
+  EXPECT_CALL(*uop2, getGeneratedAddresses()).Times(1);
+  EXPECT_CALL(*uop, getGeneratedAddresses()).Times(1);
+  lsq.startLoad(uopPtr2);
+
+  // Set store "ready to commit" so that violation gets detected
+  uopPtr->setCommitReady();
+  // Supply Store's data
+  RegisterValue strData = RegisterValue(0xABCD, strSize);
+  span<const RegisterValue> strDataSpan = {&strData, 1};
+  ON_CALL(*uop, getData()).WillByDefault(Return(strDataSpan));
+  EXPECT_CALL(*uop, getData()).Times(1);
+  lsq.supplyStoreData(uopPtr);
+
+  EXPECT_CALL(*uop, isStoreAddress()).WillOnce(Return(true));
+  EXPECT_CALL(*uop, getGeneratedAddresses()).Times(1);        // in LSQ
+  EXPECT_CALL(dataMemory, requestWrite(strTarget, strData));  // in LSQ
+  EXPECT_CALL(*uop2, getGeneratedAddresses()).Times(1);       // in LSQ
+  unsigned int committed = reorderBuffer.commit(4);
+
+  EXPECT_EQ(committed, 1);
+  EXPECT_EQ(reorderBuffer.size(), 1);
+  EXPECT_TRUE(reorderBuffer.shouldFlush());
+  EXPECT_EQ(reorderBuffer.getViolatingLoadsCount(), 1);
+  EXPECT_EQ(lsq.getViolatingLoad(), uopPtr2);
+  EXPECT_EQ(reorderBuffer.getFlushAddress(), 4096);
+  EXPECT_EQ(reorderBuffer.getFlushInsnId(), 0);
+}
+
+// Test that a branch is treated as expected, will trigger the loop buffer when
+// seen enough times (loop detection threshold set to 2)
+TEST_F(ReorderBufferTest, branch) {
+  // Set up branch instruction
+  const uint64_t insnAddr = 4096;
+  const uint64_t branchAddr = 1024;
+  BranchPrediction pred = {true, branchAddr};
+  ON_CALL(*uop, isBranch()).WillByDefault(Return(true));
+  uopPtr->setSequenceId(0);
+  uopPtr->setInstructionId(0);
+  uopPtr->setInstructionAddress(insnAddr);
+  uopPtr->setBranchPrediction(pred);
+  uopPtr->setCommitReady();
+
+  // First pass through ROB -- seen count reset to 0 as new branch
+  reorderBuffer.reserve(uopPtr);
+  EXPECT_CALL(*uop, isBranch()).Times(1);
+  reorderBuffer.commit(1);
+  EXPECT_NE(loopBoundaryAddr, insnAddr);
+
+  // Second pass through ROB -- seen count = 1
+  reorderBuffer.reserve(uopPtr);
+  EXPECT_CALL(*uop, isBranch()).Times(1);
+  reorderBuffer.commit(1);
+  EXPECT_NE(loopBoundaryAddr, insnAddr);
+
+  // Third pass through ROB -- seen count = 2
+  reorderBuffer.reserve(uopPtr);
+  EXPECT_CALL(*uop, isBranch()).Times(1);
+  reorderBuffer.commit(1);
+  EXPECT_NE(loopBoundaryAddr, insnAddr);
+
+  // Fourth pass through ROB -- seen count = 3; exceeds detection theshold,
+  // loopBoundaryAddr updated
+  reorderBuffer.reserve(uopPtr);
+  EXPECT_CALL(*uop, isBranch()).Times(1);
+  reorderBuffer.commit(1);
+  EXPECT_EQ(loopBoundaryAddr, insnAddr);
+
+  // Update prediction & reset loopBoundaryAddr. Flush ROB to reset loopDetected
+  pred = {false, branchAddr + 64};
+  uopPtr->setBranchPrediction(pred);
+  loopBoundaryAddr = 0;
+  reorderBuffer.flush(0);
+
+  // Re-do loop detecition
+  // First pass through ROB -- seen count reset to 0 as new branch
+  reorderBuffer.reserve(uopPtr);
+  EXPECT_CALL(*uop, isBranch()).Times(1);
+  reorderBuffer.commit(1);
+  EXPECT_NE(loopBoundaryAddr, insnAddr);
+
+  // Second pass through ROB -- seen count = 1
+  reorderBuffer.reserve(uopPtr);
+  EXPECT_CALL(*uop, isBranch()).Times(1);
+  reorderBuffer.commit(1);
+  EXPECT_NE(loopBoundaryAddr, insnAddr);
+
+  // Third pass through ROB -- seen count = 2
+  reorderBuffer.reserve(uopPtr);
+  EXPECT_CALL(*uop, isBranch()).Times(1);
+  reorderBuffer.commit(1);
+  EXPECT_NE(loopBoundaryAddr, insnAddr);
+
+  // Fourth pass through ROB -- seen count = 3; exceeds detection theshold,
+  // loopBoundaryAddr updated
+  reorderBuffer.reserve(uopPtr);
+  EXPECT_CALL(*uop, isBranch()).Times(1);
+  reorderBuffer.commit(1);
+  EXPECT_EQ(loopBoundaryAddr, insnAddr);
+}
+
+// Tests that only those destination registers which have been renamed are
+// rewound upon a ROB flush
+TEST_F(ReorderBufferTest, registerRewind) {
+  uop->setInstructionId(0);
+  uop->setSequenceId(0);
+  uop2->setInstructionId(1);
+  uop2->setSequenceId(1);
+
+  // Reserve entries in ROB
+  reorderBuffer.reserve(uopPtr);
+  reorderBuffer.reserve(uopPtr2);
+
+  // Rename one of the destination registers
+  Register archReg = {0, 1, 0};
+  Register renamedReg = rat.allocate({0, 1});
+  EXPECT_EQ(renamedReg.tag, 32);
+
+  // Set destination registers for to be flushed uop2 with the second register
+  // not being renamed
+  std::vector<Register> destinations = {renamedReg, {0, 2, 0}};
+  const span<Register> destinationSpan = {
+      const_cast<Register*>(destinations.data()), 2};
+  EXPECT_CALL(*uop2, getDestinationRegisters())
+      .Times(1)
+      .WillRepeatedly(Return(destinationSpan));
+
+  // Check that mappings in RAT are correct
+  EXPECT_EQ(rat.getMapping(archReg).tag, 32);
+  EXPECT_EQ(rat.getMapping(destinations[1]).tag, 2);
+
+  // Flush ROB
+  reorderBuffer.flush(0);
+
+  // Check rewind occured on only the first destination register
+  EXPECT_EQ(rat.getMapping(archReg).tag, 1);
+  EXPECT_EQ(rat.getMapping(destinations[1]).tag, 2);
 }
 
 }  // namespace pipeline
diff --git a/test/unit/riscv/ArchInfoTest.cc b/test/unit/riscv/ArchInfoTest.cc
new file mode 100644
index 0000000000..a086394633
--- /dev/null
+++ b/test/unit/riscv/ArchInfoTest.cc
@@ -0,0 +1,62 @@
+#include "gtest/gtest.h"
+#include "simeng/arch/riscv/ArchInfo.hh"
+#include "simeng/config/SimInfo.hh"
+#include "simeng/version.hh"
+
+namespace simeng {
+namespace arch {
+namespace riscv {
+
+class RiscVArchInfoTest : public ::testing::Test {
+ public:
+  RiscVArchInfoTest() {
+    simeng::config::SimInfo::setConfig(SIMENG_SOURCE_DIR
+                                       "/configs/DEMO_RISCV.yaml");
+  }
+
+ protected:
+  const std::vector<uint64_t> sysRegisterEnums = {
+      simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FFLAGS,
+      simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FRM,
+      simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_FCSR,
+      simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_CYCLE,
+      simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_TIME,
+      simeng::arch::riscv::riscv_sysreg::RISCV_SYSREG_INSTRET};
+
+  const std::vector<simeng::RegisterFileStructure> archRegStruct = {
+      {8, 32}, {8, 32}, {8, static_cast<uint16_t>(sysRegisterEnums.size())}};
+
+  const std::vector<simeng::RegisterFileStructure> physRegStruct = {
+      {8, 154}, {8, 90}, {8, static_cast<uint16_t>(sysRegisterEnums.size())}};
+
+  const std::vector<uint16_t> physRegQuants = {
+      154, 90, static_cast<uint16_t>(sysRegisterEnums.size())};
+};
+
+// Test for the getSysRegEnums() function
+TEST_F(RiscVArchInfoTest, getSysRegEnums) {
+  ArchInfo info = ArchInfo(config::SimInfo::getConfig());
+  EXPECT_EQ(info.getSysRegEnums(), sysRegisterEnums);
+}
+
+// Test for the getArchRegStruct() function
+TEST_F(RiscVArchInfoTest, getArchRegStruct) {
+  ArchInfo info = ArchInfo(config::SimInfo::getConfig());
+  EXPECT_EQ(info.getArchRegStruct(), archRegStruct);
+}
+
+// Test for the getPhysRegStruct() function
+TEST_F(RiscVArchInfoTest, getPhysRegStruct) {
+  ArchInfo info = ArchInfo(config::SimInfo::getConfig());
+  EXPECT_EQ(info.getPhysRegStruct(), physRegStruct);
+}
+
+// Test for the getPhysRegQuantities() function
+TEST_F(RiscVArchInfoTest, getPhysRegQuantities) {
+  ArchInfo info = ArchInfo(config::SimInfo::getConfig());
+  EXPECT_EQ(info.getPhysRegQuantities(), physRegQuants);
+}
+
+}  // namespace riscv
+}  // namespace arch
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/riscv/ArchitectureTest.cc b/test/unit/riscv/ArchitectureTest.cc
new file mode 100644
index 0000000000..a934232fe4
--- /dev/null
+++ b/test/unit/riscv/ArchitectureTest.cc
@@ -0,0 +1,163 @@
+#include <iostream>
+
+#include "../ConfigInit.hh"
+#include "gtest/gtest.h"
+#include "simeng/CoreInstance.hh"
+#include "simeng/RegisterFileSet.hh"
+#include "simeng/arch/aarch64/Architecture.hh"
+#include "simeng/arch/riscv/Architecture.hh"
+#include "simeng/span.hh"
+#include "simeng/version.hh"
+
+namespace simeng {
+namespace arch {
+namespace riscv {
+
+// RISC-V Tests
+class RiscVArchitectureTest : public testing::Test {
+ public:
+  RiscVArchitectureTest()
+      : kernel(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"]
+                   .as<std::string>()) {
+    arch = std::make_unique<Architecture>(kernel);
+    kernel.createProcess(process);
+  }
+
+ protected:
+  // Setting core model to complex OoO model to more verbosely test the
+  // Architecture class.
+  ConfigInit configInit = ConfigInit(config::ISA::RV64, R"YAML({
+  Core: {
+    Simulation-Mode: outoforder
+  },
+  Ports: { 
+    '0': {Portname: Port 0, Instruction-Group-Support: [INT_SIMPLE, INT_MUL, FLOAT]},
+    '1': {Portname: Port 1, Instruction-Group-Support: [INT, FLOAT]},
+    '2': {Portname: Port 2, Instruction-Group-Support: [INT_SIMPLE, INT_MUL, BRANCH]},
+    '3': {Portname: Port 4, Instruction-Group-Support: [LOAD]},
+    '4': {Portname: Port 5, Instruction-Group-Support: [LOAD]},
+    '5': {Portname: Port 3, Instruction-Group-Support: [STORE]}
+  },
+  Reservation-Stations: {
+    '0': {Size: 60, Dispatch-Rate: 4, Ports: [Port 0, Port 1, Port 2, Port 4, Port 5, Port 3]}
+  },
+  Execution-Units: {
+    '0': {Pipelined: True},
+    '1': {Pipelined: True},
+    '2': {Pipelined: True},
+    '3': {Pipelined: True},
+    '4': {Pipelined: True},
+    '5': {Pipelined: True}
+  },
+  Latencies: {
+    '0': {Instruction-Groups: [INT_SIMPLE_ARTH, INT_SIMPLE_LOGICAL], Execution-Latency: 1, Execution-Throughput: 1},
+    '1': {Instruction-Groups: [INT_MUL], Execution-Latency: 5, Execution-Throughput: 1},
+    '2': {Instruction-Groups: [INT_DIV_OR_SQRT], Execution-Latency: 39, Execution-Throughput: 39},
+    '3': {Instruction-Groups: [FLOAT_SIMPLE_CMP], Execution-Latency: 5, Execution-Throughput: 1},
+    '4': {Instruction-Groups: [FLOAT_MUL], Execution-Latency: 6, Execution-Throughput: 1},
+    '5': {Instruction-Groups: [FLOAT_SIMPLE_CVT], Execution-Latency: 7, Execution-Throughput: 1},
+    '6': {Instruction-Groups: [FLOAT_DIV_OR_SQRT], Execution-Latency: 16, Execution-Throughput: 16}
+  }
+  })YAML");
+
+  // addi	sp, ra, 2000
+  std::array<uint8_t, 4> validInstrBytes = {0x13, 0x81, 0x00, 0x7d};
+  std::array<uint8_t, 4> invalidInstrBytes = {0x7d, 0x00, 0x81, 0xbb};
+
+  std::unique_ptr<Architecture> arch;
+  kernel::Linux kernel;
+  kernel::LinuxProcess process = kernel::LinuxProcess(
+      span((char*)validInstrBytes.data(), validInstrBytes.size()));
+};
+
+TEST_F(RiscVArchitectureTest, predecode) {
+  // Test that mis-aligned instruction address results in error
+  MacroOp output;
+  uint8_t result = arch->predecode(validInstrBytes.data(),
+                                   validInstrBytes.size(), 0x7, output);
+  EXPECT_EQ(result, 1);
+  EXPECT_EQ(output[0]->getInstructionAddress(), 0x7);
+  EXPECT_EQ(output[0]->exceptionEncountered(), true);
+
+  // Test that an invalid instruction returns instruction with an exception
+  output = MacroOp();
+  result = arch->predecode(invalidInstrBytes.data(), invalidInstrBytes.size(),
+                           0x8, output);
+  EXPECT_EQ(result, 4);
+  EXPECT_EQ(output[0]->getInstructionAddress(), 0x8);
+  EXPECT_EQ(output[0]->exceptionEncountered(), true);
+
+  // Test that an instruction can be properly decoded
+  output = MacroOp();
+  result = arch->predecode(validInstrBytes.data(), validInstrBytes.size(), 0x4,
+                           output);
+  EXPECT_EQ(result, 4);
+  EXPECT_EQ(output[0]->getInstructionAddress(), 0x4);
+  EXPECT_EQ(output[0]->exceptionEncountered(), false);
+}
+
+TEST_F(RiscVArchitectureTest, getSystemRegisterTag) {
+  // Test incorrect system register will fail
+  int32_t output = arch->getSystemRegisterTag(-1);
+  EXPECT_EQ(output, -1);
+
+  // Test for correct behaviour
+  output = arch->getSystemRegisterTag(RISCV_SYSREG_FFLAGS);
+  EXPECT_EQ(output, 0);
+}
+
+TEST_F(RiscVArchitectureTest, handleException) {
+  // Get Instruction
+  MacroOp insn;
+  uint8_t bytes = arch->predecode(invalidInstrBytes.data(),
+                                  invalidInstrBytes.size(), 0x4, insn);
+  EXPECT_EQ(bytes, 4);
+  EXPECT_EQ(insn[0]->getInstructionAddress(), 0x4);
+  EXPECT_EQ(insn[0]->exceptionEncountered(), true);
+
+  // Get Core
+  std::string executablePath = "";
+  std::vector<std::string> executableArgs = {};
+  std::unique_ptr<CoreInstance> coreInstance =
+      std::make_unique<CoreInstance>(executablePath, executableArgs);
+  const Core& core = *coreInstance->getCore();
+  MemoryInterface& memInt = *coreInstance->getDataMemory();
+  auto exceptionHandler = arch->handleException(insn[0], core, memInt);
+
+  bool tickRes = exceptionHandler->tick();
+  auto result = exceptionHandler->getResult();
+  EXPECT_TRUE(tickRes);
+  EXPECT_TRUE(result.fatal);
+  // Instruction address for fatal exception is always 0.
+  EXPECT_EQ(result.instructionAddress, 0x0);
+}
+
+TEST_F(RiscVArchitectureTest, getInitialState) {
+  std::vector<Register> regs = {{RegisterType::GENERAL, 2}};
+  std::vector<RegisterValue> regVals = {{kernel.getInitialStackPointer(), 8}};
+
+  arch::ProcessStateChange changes = arch->getInitialState();
+  EXPECT_EQ(changes.type, arch::ChangeType::REPLACEMENT);
+  EXPECT_EQ(changes.modifiedRegisters, regs);
+  EXPECT_EQ(changes.modifiedRegisterValues, regVals);
+}
+
+TEST_F(RiscVArchitectureTest, getMaxInstructionSize) {
+  EXPECT_EQ(arch->getMaxInstructionSize(), 4);
+}
+
+TEST_F(RiscVArchitectureTest, updateSystemTimerRegisters) {
+  RegisterFileSet regFile = config::SimInfo::getArchRegStruct();
+  Register cycleSystemReg = {
+      RegisterType::SYSTEM,
+      static_cast<uint16_t>(arch->getSystemRegisterTag(RISCV_SYSREG_CYCLE))};
+
+  uint64_t ticks = 30;
+  EXPECT_EQ(regFile.get(cycleSystemReg), RegisterValue(0, 8));
+  arch->updateSystemTimerRegisters(&regFile, ticks);
+  EXPECT_EQ(regFile.get(cycleSystemReg), RegisterValue(ticks, 8));
+}
+
+}  // namespace riscv
+}  // namespace arch
+}  // namespace simeng
diff --git a/test/unit/riscv/ExceptionHandlerTest.cc b/test/unit/riscv/ExceptionHandlerTest.cc
new file mode 100644
index 0000000000..dc51f781e8
--- /dev/null
+++ b/test/unit/riscv/ExceptionHandlerTest.cc
@@ -0,0 +1,631 @@
+#include "../ConfigInit.hh"
+#include "../MockCore.hh"
+#include "../MockInstruction.hh"
+#include "../MockMemoryInterface.hh"
+#include "gmock/gmock.h"
+#include "simeng/ArchitecturalRegisterFileSet.hh"
+#include "simeng/arch/riscv/Architecture.hh"
+#include "simeng/arch/riscv/ExceptionHandler.hh"
+#include "simeng/arch/riscv/Instruction.hh"
+
+namespace simeng {
+namespace arch {
+namespace riscv {
+
+using ::testing::HasSubstr;
+using ::testing::Return;
+using ::testing::ReturnRef;
+
+class RiscVExceptionHandlerTest : public ::testing::Test {
+ public:
+  RiscVExceptionHandlerTest()
+      : kernel(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"]
+                   .as<std::string>()),
+        arch(kernel),
+        physRegFileSet(config::SimInfo::getArchRegStruct()),
+        archRegFileSet(physRegFileSet) {}
+
+ protected:
+  ConfigInit configInit = ConfigInit(config::ISA::RV64, "");
+
+  MockCore core;
+  MockMemoryInterface memory;
+  kernel::Linux kernel;
+  Architecture arch;
+
+  RegisterFileSet physRegFileSet;
+  ArchitecturalRegisterFileSet archRegFileSet;
+
+  // addi	sp, ra, 2000 --- Just need a valid instruction to hijack
+  std::array<uint8_t, 4> validInstrBytes = {0x13, 0x81, 0x00, 0x7d};
+
+  /** Helper constants for RISC-V general-purpose registers. */
+  static constexpr Register R0 = {RegisterType::GENERAL, 10};
+  static constexpr Register R1 = {RegisterType::GENERAL, 11};
+  static constexpr Register R2 = {RegisterType::GENERAL, 12};
+  static constexpr Register R3 = {RegisterType::GENERAL, 13};
+  static constexpr Register R4 = {RegisterType::GENERAL, 14};
+  static constexpr Register R5 = {RegisterType::GENERAL, 15};
+  static constexpr Register R7 = {RegisterType::GENERAL, 17};
+};
+
+// All system calls are tested in /test/regression/riscv/Syscall.cc
+
+// Test that a syscall is processed sucessfully
+TEST_F(RiscVExceptionHandlerTest, testSyscall) {
+  // Create "syscall" instruction
+  uint64_t insnAddr = 0x4;
+  MacroOp uops;
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  InstructionException exception = InstructionException::SupervisorCall;
+  std::shared_ptr<Instruction> insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  insn->setInstructionAddress(insnAddr);
+
+  // Setup register file for `uname` syscall (chosen as minimal functionality)
+  archRegFileSet.set(R0, RegisterValue(1234, 8));
+  archRegFileSet.set(R7, RegisterValue(160, 8));
+
+  // Create ExceptionHandler
+  ExceptionHandler handler(insn, core, memory, kernel);
+
+  // Tick exceptionHandler
+  ON_CALL(core, getArchitecturalRegisterFileSet())
+      .WillByDefault(ReturnRef(archRegFileSet));
+  EXPECT_CALL(core, getArchitecturalRegisterFileSet()).Times(1);
+  bool retVal = handler.tick();
+  ExceptionResult result = handler.getResult();
+
+  EXPECT_TRUE(retVal);
+  EXPECT_FALSE(result.fatal);
+  EXPECT_EQ(result.instructionAddress, insnAddr + 4);
+  EXPECT_EQ(result.stateChange.type, ChangeType::REPLACEMENT);
+  std::vector<Register> modRegs = {R0};
+  EXPECT_EQ(result.stateChange.modifiedRegisters, modRegs);
+  std::vector<RegisterValue> modRegVals = {{0ull, 8}};
+  EXPECT_EQ(result.stateChange.modifiedRegisterValues, modRegVals);
+  std::vector<MemoryAccessTarget> modMemTargets = {{1234, 6},
+                                                   {1234 + 65, 13},
+                                                   {1234 + (65 * 2), 42},
+                                                   {1234 + (65 * 3), 35},
+                                                   {1234 + (65 * 4), 8},
+                                                   {1234 + (65 * 5), 7}};
+  EXPECT_EQ(result.stateChange.memoryAddresses, modMemTargets);
+  std::vector<RegisterValue> modMemVals = {
+      RegisterValue("Linux"),
+      RegisterValue("fedora-riscv"),
+      RegisterValue("5.5.0-0.rc5.git0.1.1.riscv64.fc32.riscv64"),
+      RegisterValue("#1 SMP Mon Jan 6 17:31:22 UTC 2020"),
+      RegisterValue("riscv64"),
+      RegisterValue("(none)")};
+  EXPECT_EQ(result.stateChange.memoryAddressValues, modMemVals);
+}
+
+// Test that `readStringThen()` operates as expected
+TEST_F(RiscVExceptionHandlerTest, readStringThen) {
+  // Create new mock instruction and ExceptionHandler
+  std::shared_ptr<MockInstruction> uopPtr(new MockInstruction);
+  ExceptionHandler handler(uopPtr, core, memory, kernel);
+
+  // Initialise variables
+  size_t retVal = 0;
+  char* buffer;
+  buffer = (char*)malloc(256);
+  for (int i = 0; i < 256; i++) {
+    buffer[i] = 'q';
+  }
+  uint64_t addr = 1024;
+  int maxLen = kernel::Linux::LINUX_PATH_MAX;
+
+  MemoryAccessTarget target1 = {addr, 1};
+  MemoryReadResult res1 = {target1, RegisterValue(0xAB, 1), 1};
+  span<MemoryReadResult> res1Span = span<MemoryReadResult>(&res1, 1);
+
+  MemoryAccessTarget target2 = {addr + 1, 1};
+  MemoryReadResult res2 = {target2, RegisterValue(static_cast<int>('\0'), 1),
+                           1};
+  span<MemoryReadResult> res2Span = span<MemoryReadResult>(&res2, 1);
+
+  // On first call to readStringThen, expect return of false and retVal to still
+  // be 0, and buffer to be filled with `q`
+  MemoryAccessTarget tar = {addr, 1};
+  EXPECT_CALL(memory, requestRead(tar, 0)).Times(1);
+  bool outcome =
+      handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) {
+        retVal = length;
+        return true;
+      });
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  for (int i = 0; i < 256; i++) {
+    EXPECT_EQ(buffer[i], 'q');
+  }
+
+  // ResumeHandling (called on tick()) should now be set to `readStringThen()`
+  // so call this for our second pass.
+  ON_CALL(memory, getCompletedReads())
+      .WillByDefault(Return(span<MemoryReadResult>()));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  outcome = handler.tick();
+  // No memory reads completed yet so again expect to return false and no change
+  // to `retval` or buffer
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  for (int i = 0; i < 256; i++) {
+    EXPECT_EQ(buffer[i], 'q');
+  }
+
+  // Call tick() again, but mimic a memory read completing
+  tar = {addr + 1, 1};
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res1Span));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  EXPECT_CALL(memory, requestRead(tar, 0)).Times(1);
+  outcome = handler.tick();
+  // Completed read but still not complete, so outcome should be false, retVal
+  // unchanged, but some data in the buffer
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  for (int i = 0; i < 256; i++) {
+    if (i == 0) {
+      EXPECT_EQ(buffer[i], (char)0xAB);
+    } else {
+      EXPECT_EQ(buffer[i], 'q');
+    }
+  }
+
+  // Call tick() for a final time, getting the final read result
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res2Span));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  outcome = handler.tick();
+  // End of string '\0' found so expect `then()` to have been called, the
+  // outcome to be true, and the buffer again to have updated
+  EXPECT_TRUE(outcome);
+  EXPECT_EQ(retVal, 1);
+  for (int i = 0; i < 256; i++) {
+    if (i == 0) {
+      EXPECT_EQ(buffer[i], (char)0xAB);
+    } else if (i == 1) {
+      EXPECT_EQ(buffer[i], '\0');
+    } else {
+      EXPECT_EQ(buffer[i], 'q');
+    }
+  }
+}
+
+// Test that in `readStringThen()` if max length is 0, then is called straight
+// away
+TEST_F(RiscVExceptionHandlerTest, readStringThen_maxLen0) {
+  // Create new mock instruction and ExceptionHandler
+  std::shared_ptr<MockInstruction> uopPtr(new MockInstruction);
+  ExceptionHandler handler(uopPtr, core, memory, kernel);
+  size_t retVal = 100;
+  char* buffer;
+  buffer = (char*)malloc(256);
+  for (int i = 0; i < 256; i++) {
+    buffer[i] = 'q';
+  }
+  uint64_t addr = 1024;
+  int maxLen = 0;
+
+  bool outcome =
+      handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) {
+        retVal = length;
+        return true;
+      });
+  EXPECT_TRUE(outcome);
+  EXPECT_EQ(retVal, -1);
+  for (int i = 0; i < 256; i++) {
+    EXPECT_EQ(buffer[i], 'q');
+  }
+}
+
+// Test that in `readStringThen()` if max length has been met, then() is called
+// and no more string is fetched
+TEST_F(RiscVExceptionHandlerTest, readStringThen_maxLenReached) {
+  // Create new mock instruction and ExceptionHandler
+  std::shared_ptr<MockInstruction> uopPtr(new MockInstruction);
+  ExceptionHandler handler(uopPtr, core, memory, kernel);
+
+  // Initialise variables
+  size_t retVal = 100;
+  char* buffer;
+  buffer = (char*)malloc(256);
+  for (int i = 0; i < 256; i++) {
+    buffer[i] = 'q';
+  }
+  uint64_t addr = 1024;
+  int maxLen = 1;
+
+  MemoryAccessTarget target1 = {addr, 1};
+  MemoryReadResult res1 = {target1, RegisterValue(0xAB, 1), 1};
+  span<MemoryReadResult> res1Span = span<MemoryReadResult>(&res1, 1);
+
+  // On first call to readStringThen, expect return of false and retVal to still
+  // be 0, and buffer to be filled with `q`
+  MemoryAccessTarget tar = {addr, 1};
+  EXPECT_CALL(memory, requestRead(tar, 0)).Times(1);
+  bool outcome =
+      handler.readStringThen(buffer, addr, maxLen, [&retVal](auto length) {
+        retVal = length;
+        return true;
+      });
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 100);
+  for (int i = 0; i < 256; i++) {
+    EXPECT_EQ(buffer[i], 'q');
+  }
+
+  // ResumeHandling (called on tick()) should now be set to `readStringThen()`
+  // so call this for our second pass.
+  ON_CALL(memory, getCompletedReads())
+      .WillByDefault(Return(span<MemoryReadResult>()));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  outcome = handler.tick();
+  // No memory reads completed yet so again expect to return false and no change
+  // to `retval` or buffer
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 100);
+  for (int i = 0; i < 256; i++) {
+    EXPECT_EQ(buffer[i], 'q');
+  }
+
+  // Call tick() again, but mimic a memory read completing
+  ON_CALL(memory, getCompletedReads()).WillByDefault(Return(res1Span));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  outcome = handler.tick();
+  // Completed read and maxLength reached. Expect then() to have been called,
+  // the outcome to be true, and the buffer to have updated. RetVal should be
+  // maxLength
+  EXPECT_TRUE(outcome);
+  EXPECT_EQ(retVal, 1);
+  for (int i = 0; i < 256; i++) {
+    if (i == 0) {
+      EXPECT_EQ(buffer[i], (char)0xAB);
+    } else {
+      EXPECT_EQ(buffer[i], 'q');
+    }
+  }
+}
+
+// Test that `readBufferThen()` operates as expected
+TEST_F(RiscVExceptionHandlerTest, readBufferThen) {
+  // Create new mock instruction and ExceptionHandler
+  std::shared_ptr<MockInstruction> uopPtr(new MockInstruction);
+  uopPtr->setSequenceId(5);
+  ExceptionHandler handler(uopPtr, core, memory, kernel);
+
+  // Initialise needed values for function
+  uint64_t retVal = 0;
+  uint64_t ptr = 0;
+  uint64_t length = 192;
+
+  // Initialise data to "read" from MockMemory
+  std::vector<char> dataVec(length, 'q');
+  std::vector<char> dataVec2(length, 'q');
+  // Initialise the two required targets (128-bytes per read request in
+  // readBufferThen())
+  MemoryAccessTarget tar1 = {ptr, 128};
+  MemoryAccessTarget tar2 = {ptr + 128, static_cast<uint16_t>(length - 128)};
+  // Initialise "responses" from the MockMemory
+  MemoryReadResult res1 = {tar1, RegisterValue(dataVec.data() + ptr, 128),
+                           uopPtr->getSequenceId()};
+  MemoryReadResult res2 = {
+      tar2, RegisterValue(dataVec.data() + ptr + 128, length - 128),
+      uopPtr->getSequenceId()};
+
+  // Confirm that internal dataBuffer is empty
+  EXPECT_EQ(handler.dataBuffer.size(), 0);
+
+  // Initial call to readBufferThen - expect resumeHandling to be updated to
+  // readBufferThen and a memory read request to have occurred
+  EXPECT_CALL(memory, requestRead(tar1, uopPtr->getSequenceId())).Times(1);
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  bool outcome = handler.readBufferThen(ptr, length, [&retVal]() {
+    retVal = 10;
+    return true;
+  });
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  EXPECT_EQ(handler.dataBuffer.size(), 0);
+
+  // Can now call tick() - on call, emulate no reads completed
+  ON_CALL(memory, getCompletedReads())
+      .WillByDefault(Return(span<MemoryReadResult>()));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  outcome = handler.tick();
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  EXPECT_EQ(handler.dataBuffer.size(), 0);
+
+  // Call tick() again, simulating completed read + new read requested as still
+  // data to fetch
+  ON_CALL(memory, getCompletedReads())
+      .WillByDefault(Return(span<MemoryReadResult>(&res1, 1)));
+  // Make sure clearCompletedReads() alters functionality of getCompletedReads()
+  ON_CALL(memory, clearCompletedReads())
+      .WillByDefault(::testing::InvokeWithoutArgs([&]() {
+        ON_CALL(memory, getCompletedReads())
+            .WillByDefault(Return(span<MemoryReadResult>()));
+      }));
+  EXPECT_CALL(memory, getCompletedReads()).Times(2);
+  EXPECT_CALL(memory, clearCompletedReads()).Times(1);
+  EXPECT_CALL(memory, requestRead(tar2, uopPtr->getSequenceId())).Times(1);
+  outcome = handler.tick();
+  EXPECT_FALSE(outcome);
+  EXPECT_EQ(retVal, 0);
+  EXPECT_EQ(handler.dataBuffer.size(), 128);
+  for (int i = 0; i < handler.dataBuffer.size(); i++) {
+    EXPECT_EQ(handler.dataBuffer[i], 'q');
+  }
+
+  // One final call to tick() to get last bits of data from memory and call
+  // then()
+  ON_CALL(memory, getCompletedReads())
+      .WillByDefault(Return(span<MemoryReadResult>(&res2, 1)));
+  EXPECT_CALL(memory, getCompletedReads()).Times(1);
+  EXPECT_CALL(memory, clearCompletedReads()).Times(1);
+  outcome = handler.tick();
+  EXPECT_TRUE(outcome);
+  EXPECT_EQ(retVal, 10);
+  EXPECT_EQ(handler.dataBuffer.size(), length);
+  for (int i = 0; i < length; i++) {
+    EXPECT_EQ(handler.dataBuffer[i], static_cast<unsigned char>('q'));
+  }
+}
+
+// Test that `readBufferThen()` calls then if length is 0
+TEST_F(RiscVExceptionHandlerTest, readBufferThen_length0) {
+  // Create new mock instruction and ExceptionHandler
+  std::shared_ptr<MockInstruction> uopPtr(new MockInstruction);
+  ExceptionHandler handler(uopPtr, core, memory, kernel);
+
+  const size_t expectedVal = 10;
+  uint64_t retVal = 0;
+  uint64_t ptr = 0;
+  uint64_t length = 0;
+
+  bool outcome = handler.readBufferThen(ptr, length, [&retVal]() {
+    retVal = 10;
+    return true;
+  });
+  EXPECT_TRUE(outcome);
+  EXPECT_EQ(retVal, expectedVal);
+}
+
+// Test that all RISC-V exception types print as expected
+TEST_F(RiscVExceptionHandlerTest, printException) {
+  ON_CALL(core, getArchitecturalRegisterFileSet())
+      .WillByDefault(ReturnRef(archRegFileSet));
+  uint64_t insnAddr = 0x4;
+  MacroOp uops;
+
+  // Create instruction for EncodingUnallocated
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  InstructionException exception = InstructionException::EncodingUnallocated;
+  std::shared_ptr<Instruction> insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_0(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  std::stringstream buffer;
+  std::streambuf* sbuf = std::cout.rdbuf();  // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());           // Redirect cout to buffer
+  handler_0.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "encoding unallocated exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for ExecutionNotYetImplemented
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::ExecutionNotYetImplemented;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_1(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_1.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(),
+              HasSubstr("[SimEng:ExceptionHandler] Encountered execution "
+                        "not-yet-implemented exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for AliasNotYetImplemented
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::AliasNotYetImplemented;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_2(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_2.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "alias not-yet-implemented exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for MisalignedPC
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::MisalignedPC;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_3(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_3.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(),
+              HasSubstr("[SimEng:ExceptionHandler] Encountered misaligned "
+                        "program counter exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for DataAbort
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::DataAbort;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_4(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_4.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(
+      buffer.str(),
+      HasSubstr("[SimEng:ExceptionHandler] Encountered data abort exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for SupervisorCall
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::SupervisorCall;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_5(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_5.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(
+      buffer.str(),
+      HasSubstr(
+          "[SimEng:ExceptionHandler] Encountered supervisor call exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for HypervisorCall
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::HypervisorCall;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_6(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_6.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(
+      buffer.str(),
+      HasSubstr(
+          "[SimEng:ExceptionHandler] Encountered hypervisor call exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for SecureMonitorCall
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::SecureMonitorCall;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_7(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_7.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "secure monitor call exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for NoAvailablePort
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::NoAvailablePort;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_8(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_8.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "unsupported execution port exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for IllegalInstruction
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::IllegalInstruction;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_9(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_9.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "illegal instruction exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for PipelineFlush
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::PipelineFlush;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_10(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_10.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(), HasSubstr("[SimEng:ExceptionHandler] Encountered "
+                                      "unknown atomic operation exception"));
+  buffer.str(std::string());
+  uops.clear();
+
+  // Create instruction for default case
+  arch.predecode(validInstrBytes.data(), validInstrBytes.size(), insnAddr,
+                 uops);
+  exception = InstructionException::None;
+  insn = std::make_shared<Instruction>(
+      arch, static_cast<Instruction*>(uops[0].get())->getMetadata(), exception);
+  // Create ExceptionHandler
+  ExceptionHandler handler_11(insn, core, memory, kernel);
+  // Capture std::cout and tick exceptionHandler
+  sbuf = std::cout.rdbuf();         // Save cout's buffer
+  std::cout.rdbuf(buffer.rdbuf());  // Redirect cout to buffer
+  handler_11.printException(*static_cast<Instruction*>(insn.get()));
+  std::cout.rdbuf(sbuf);  // Restore cout
+  EXPECT_THAT(buffer.str(),
+              HasSubstr("[SimEng:ExceptionHandler] Encountered unknown (id: "
+                        "0) exception"));
+  buffer.str(std::string());
+  uops.clear();
+}
+
+}  // namespace riscv
+}  // namespace arch
+}  // namespace simeng
\ No newline at end of file
diff --git a/test/unit/riscv/InstructionTest.cc b/test/unit/riscv/InstructionTest.cc
new file mode 100644
index 0000000000..37580c4f80
--- /dev/null
+++ b/test/unit/riscv/InstructionTest.cc
@@ -0,0 +1,620 @@
+#include "../ConfigInit.hh"
+#include "../MockArchitecture.hh"
+#include "arch/riscv/InstructionMetadata.hh"
+#include "gmock/gmock.h"
+#include "simeng/arch/riscv/Instruction.hh"
+#include "simeng/version.hh"
+
+namespace simeng {
+namespace arch {
+namespace riscv {
+
+// RiscV Instruction Tests
+class RiscVInstructionTest : public testing::Test {
+ public:
+  RiscVInstructionTest()
+      : os(config::SimInfo::getConfig()["CPU-Info"]["Special-File-Dir-Path"]
+               .as<std::string>()),
+        arch(os) {
+    // Create InstructionMetadata objects
+    cs_open(CS_ARCH_RISCV, CS_MODE_RISCV64, &capstoneHandle);
+    cs_option(capstoneHandle, CS_OPT_DETAIL, CS_OPT_ON);
+
+    // Create instructions which cover the 3 main types: Arithmetic, Memory,
+    // Branch. This allows for full testing of the Instruction class.
+
+    // div
+    cs_insn rawInsn_div;
+    cs_detail rawDetail_div;
+    rawInsn_div.detail = &rawDetail_div;
+    size_t size_div = 4;
+    uint64_t address_div = 0;
+    const uint8_t* encoding_div =
+        reinterpret_cast<const uint8_t*>(divInstrBytes.data());
+    cs_disasm_iter(capstoneHandle, &encoding_div, &size_div, &address_div,
+                   &rawInsn_div);
+    divMetadata = std::make_unique<InstructionMetadata>(rawInsn_div);
+
+    // lbu
+    cs_insn rawInsn_lbu;
+    cs_detail rawDetail_ldp;
+    rawInsn_lbu.detail = &rawDetail_ldp;
+    size_t size_lbu = 4;
+    uint64_t address_lbu = 0;
+    const uint8_t* encoding_lbu =
+        reinterpret_cast<const uint8_t*>(lbuInstrBytes.data());
+    cs_disasm_iter(capstoneHandle, &encoding_lbu, &size_lbu, &address_lbu,
+                   &rawInsn_lbu);
+    lbuMetadata = std::make_unique<InstructionMetadata>(rawInsn_lbu);
+
+    // bgeu
+    cs_insn rawInsn_bgeu;
+    cs_detail rawDetail_bgeu;
+    rawInsn_bgeu.detail = &rawDetail_bgeu;
+    size_t size_bgeu = 4;
+    uint64_t address_bgeu = 0;
+    const uint8_t* encoding_bgeu =
+        reinterpret_cast<const uint8_t*>(bgeuInstrBytes.data());
+    cs_disasm_iter(capstoneHandle, &encoding_bgeu, &size_bgeu, &address_bgeu,
+                   &rawInsn_bgeu);
+    bgeuMetadata = std::make_unique<InstructionMetadata>(rawInsn_bgeu);
+
+    const uint8_t* badEncoding =
+        reinterpret_cast<const uint8_t*>(invalidInstrBytes.data());
+    invalidMetadata = std::make_unique<InstructionMetadata>(badEncoding);
+  }
+
+  ~RiscVInstructionTest() { cs_close(&capstoneHandle); }
+
+ protected:
+  ConfigInit configInit = ConfigInit(config::ISA::RV64, "");
+
+  // div a3, a3, a0
+  std::array<uint8_t, 4> divInstrBytes = {0xB3, 0xC6, 0xA6, 0x02};
+  // lbu a5, 0(s3)
+  std::array<uint8_t, 4> lbuInstrBytes = {0x83, 0xC7, 0x09, 0x00};
+  // bgeu a5, a4, -86
+  std::array<uint8_t, 4> bgeuInstrBytes = {0xE3, 0xF5, 0xE7, 0xFA};
+  std::array<uint8_t, 4> invalidInstrBytes = {0x20, 0x00, 0x02, 0x8c};
+
+  // A Capstone decoding library handle, for decoding instructions.
+  csh capstoneHandle;
+
+  kernel::Linux os;
+  Architecture arch;
+
+  std::unique_ptr<InstructionMetadata> divMetadata;
+  std::unique_ptr<InstructionMetadata> lbuMetadata;
+  std::unique_ptr<InstructionMetadata> bgeuMetadata;
+  std::unique_ptr<InstructionMetadata> invalidMetadata;
+  InstructionException exception;
+};
+
+// Test that a valid instruction is created correctly
+TEST_F(RiscVInstructionTest, validInsn) {
+  // Insn is `div	a3, a3, a0`
+  Instruction insn = Instruction(arch, *divMetadata.get());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {{RegisterType::GENERAL, 13}};
+  std::vector<Register> srcRegs = {{RegisterType::GENERAL, 13},
+                                   {RegisterType::GENERAL, 10}};
+  const std::vector<uint16_t> ports = {1, 2, 3};
+  insn.setExecutionInfo({3, 4, ports});
+  insn.setInstructionAddress(0x48);
+  insn.setInstructionId(11);
+  insn.setSequenceId(12);
+
+  // Ensure that all instruction values are as expected after creation
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false;
+  EXPECT_EQ(&insn.getArchitecture(), &arch);
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Unknown);
+  EXPECT_EQ(insn.getData().size(), 0);
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size());
+  for (int i = 0; i < destRegs.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]);
+  }
+  EXPECT_EQ(insn.getException(), InstructionException::None);
+  EXPECT_EQ(insn.getGeneratedAddresses().size(), 0);
+  EXPECT_EQ(insn.getGroup(), InstructionGroups::INT_DIV_OR_SQRT);
+  EXPECT_EQ(insn.getInstructionAddress(), 0x48);
+  EXPECT_EQ(insn.getInstructionId(), 11);
+  EXPECT_EQ(insn.getKnownOffset(), 0);
+  EXPECT_EQ(insn.getLatency(), 3);
+  EXPECT_EQ(insn.getLSQLatency(), 1);
+  EXPECT_EQ(&insn.getMetadata(), divMetadata.get());
+  EXPECT_EQ(insn.getMicroOpIndex(), 0);
+  EXPECT_EQ(insn.getResults().size(), 1);
+  EXPECT_EQ(insn.getSequenceId(), 12);
+  EXPECT_EQ(insn.getSourceOperands().size(), 2);
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size());
+  for (int i = 0; i < srcRegs.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]);
+    EXPECT_FALSE(insn.isOperandReady(i));
+  }
+  EXPECT_EQ(insn.getStallCycles(), 4);
+  EXPECT_EQ(insn.getSupportedPorts(), ports);
+
+  EXPECT_FALSE(insn.canExecute());
+  EXPECT_FALSE(insn.isStoreAddress());
+  EXPECT_FALSE(insn.isStoreData());
+  EXPECT_FALSE(insn.isLoad());
+  EXPECT_FALSE(insn.isBranch());
+  EXPECT_FALSE(insn.isAtomic());
+  EXPECT_FALSE(insn.isFloat());
+  EXPECT_FALSE(insn.exceptionEncountered());
+  EXPECT_FALSE(insn.hasExecuted());
+  EXPECT_FALSE(insn.canCommit());
+  EXPECT_TRUE(insn.hasAllData());
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_FALSE(insn.isFlushed());
+  EXPECT_FALSE(insn.isMicroOp());
+  EXPECT_TRUE(insn.isLastMicroOp());
+  EXPECT_FALSE(insn.isWaitingCommit());
+}
+
+// Test that an invalid instruction can be created - invalid due to byte stream
+TEST_F(RiscVInstructionTest, invalidInsn_1) {
+  Instruction insn = Instruction(arch, *invalidMetadata.get());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {};
+  std::vector<Register> srcRegs = {};
+  const std::vector<uint16_t> ports = {};
+  insn.setExecutionInfo({1, 1, ports});
+  insn.setInstructionAddress(0x44);
+  insn.setInstructionId(13);
+  insn.setSequenceId(14);
+
+  // Ensure that all instruction values are as expected after creation
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false;
+  EXPECT_EQ(&insn.getArchitecture(), &arch);
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Unknown);
+  EXPECT_EQ(insn.getData().size(), 0);
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size());
+  for (int i = 0; i < destRegs.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]);
+  }
+  EXPECT_EQ(insn.getException(), InstructionException::EncodingUnallocated);
+  EXPECT_EQ(insn.getGeneratedAddresses().size(), 0);
+  // Default Group
+  EXPECT_EQ(insn.getGroup(), InstructionGroups::INT_SIMPLE_ARTH);
+  EXPECT_EQ(insn.getInstructionAddress(), 0x44);
+  EXPECT_EQ(insn.getInstructionId(), 13);
+  EXPECT_EQ(insn.getKnownOffset(), 0);
+  EXPECT_EQ(insn.getLatency(), 1);
+  EXPECT_EQ(insn.getLSQLatency(), 1);
+  EXPECT_EQ(&insn.getMetadata(), invalidMetadata.get());
+  EXPECT_EQ(insn.getMicroOpIndex(), 0);
+  EXPECT_EQ(insn.getResults().size(), 0);
+  EXPECT_EQ(insn.getSequenceId(), 14);
+  EXPECT_EQ(insn.getSourceOperands().size(), 0);
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size());
+  for (int i = 0; i < srcRegs.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]);
+    EXPECT_FALSE(insn.isOperandReady(i));
+  }
+  EXPECT_EQ(insn.getStallCycles(), 1);
+  EXPECT_EQ(insn.getSupportedPorts(), ports);
+
+  EXPECT_TRUE(insn.canExecute());
+  EXPECT_FALSE(insn.isStoreAddress());
+  EXPECT_FALSE(insn.isStoreData());
+  EXPECT_FALSE(insn.isLoad());
+  EXPECT_FALSE(insn.isBranch());
+  EXPECT_FALSE(insn.isAtomic());
+  EXPECT_FALSE(insn.isFloat());
+  EXPECT_TRUE(insn.exceptionEncountered());
+  EXPECT_FALSE(insn.hasExecuted());
+  EXPECT_FALSE(insn.canCommit());
+  EXPECT_TRUE(insn.hasAllData());
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_FALSE(insn.isFlushed());
+  EXPECT_FALSE(insn.isMicroOp());
+  EXPECT_TRUE(insn.isLastMicroOp());
+  EXPECT_FALSE(insn.isWaitingCommit());
+}
+
+// Test that an invalid instruction can be created - invalid due to exception
+// provided
+TEST_F(RiscVInstructionTest, invalidInsn_2) {
+  Instruction insn = Instruction(arch, *invalidMetadata.get(),
+                                 InstructionException::HypervisorCall);
+  // Define instruction's registers
+  std::vector<Register> destRegs = {};
+  std::vector<Register> srcRegs = {};
+  const std::vector<uint16_t> ports = {};
+  insn.setExecutionInfo({1, 1, ports});
+  insn.setInstructionAddress(0x43);
+  insn.setInstructionId(15);
+  insn.setSequenceId(16);
+
+  // Ensure that all instruction values are as expected after creation
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred) ? true : false;
+  EXPECT_EQ(&insn.getArchitecture(), &arch);
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Unknown);
+  EXPECT_EQ(insn.getData().size(), 0);
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size());
+  for (int i = 0; i < destRegs.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]);
+  }
+  EXPECT_EQ(insn.getException(), InstructionException::HypervisorCall);
+  EXPECT_EQ(insn.getGeneratedAddresses().size(), 0);
+  // Default Group
+  EXPECT_EQ(insn.getGroup(), InstructionGroups::INT_SIMPLE_ARTH);
+  EXPECT_EQ(insn.getInstructionAddress(), 0x43);
+  EXPECT_EQ(insn.getInstructionId(), 15);
+  EXPECT_EQ(insn.getKnownOffset(), 0);
+  EXPECT_EQ(insn.getLatency(), 1);
+  EXPECT_EQ(insn.getLSQLatency(), 1);
+  EXPECT_EQ(&insn.getMetadata(), invalidMetadata.get());
+  EXPECT_EQ(insn.getMicroOpIndex(), 0);
+  EXPECT_EQ(insn.getResults().size(), 0);
+  EXPECT_EQ(insn.getSequenceId(), 16);
+  EXPECT_EQ(insn.getSourceOperands().size(), 0);
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size());
+  for (int i = 0; i < srcRegs.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]);
+    EXPECT_FALSE(insn.isOperandReady(i));
+  }
+  EXPECT_EQ(insn.getStallCycles(), 1);
+  EXPECT_EQ(insn.getSupportedPorts(), ports);
+
+  EXPECT_TRUE(insn.canExecute());
+  EXPECT_FALSE(insn.isStoreAddress());
+  EXPECT_FALSE(insn.isStoreData());
+  EXPECT_FALSE(insn.isLoad());
+  EXPECT_FALSE(insn.isBranch());
+  EXPECT_FALSE(insn.isAtomic());
+  EXPECT_FALSE(insn.isFloat());
+  EXPECT_TRUE(insn.exceptionEncountered());
+  EXPECT_FALSE(insn.hasExecuted());
+  EXPECT_FALSE(insn.canCommit());
+  EXPECT_TRUE(insn.hasAllData());
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_FALSE(insn.isFlushed());
+  EXPECT_FALSE(insn.isMicroOp());
+  EXPECT_TRUE(insn.isLastMicroOp());
+  EXPECT_FALSE(insn.isWaitingCommit());
+}
+
+// Test to ensure that source and operand registers can be renamed correctly
+TEST_F(RiscVInstructionTest, renameRegs) {
+  // Insn is `div	a3, a3, a0`
+  Instruction insn = Instruction(arch, *divMetadata.get());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {{RegisterType::GENERAL, 13}};
+  std::vector<Register> srcRegs = {{RegisterType::GENERAL, 13},
+                                   {RegisterType::GENERAL, 10}};
+  // Ensure registers decoded correctly
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size());
+  for (int i = 0; i < srcRegs.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]);
+  }
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size());
+  for (int i = 0; i < destRegs.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]);
+  }
+
+  // Define renamed registers
+  std::vector<Register> destRegs_new = {{RegisterType::GENERAL, 24}};
+  std::vector<Register> srcRegs_new = {{RegisterType::GENERAL, 13},
+                                       {RegisterType::GENERAL, 97}};
+  insn.renameDestination(0, destRegs_new[0]);
+  insn.renameSource(1, srcRegs_new[1]);
+  // Ensure renaming functionality works as expected
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs_new.size());
+  for (int i = 0; i < srcRegs_new.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs_new[i]);
+  }
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs_new.size());
+  for (int i = 0; i < destRegs_new.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs_new[i]);
+  }
+}
+
+// Test that operand values can be properly supplied and change the state of
+// `canExecute`
+TEST_F(RiscVInstructionTest, supplyOperand) {
+  // Insn is `div	a3, a3, a0`
+  Instruction insn = Instruction(arch, *divMetadata.get());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {{RegisterType::GENERAL, 13}};
+  std::vector<Register> srcRegs = {{RegisterType::GENERAL, 13},
+                                   {RegisterType::GENERAL, 10}};
+  // Check initial state is as expected
+  EXPECT_FALSE(insn.canExecute());
+  EXPECT_FALSE(insn.isOperandReady(0));
+  EXPECT_FALSE(insn.isOperandReady(1));
+
+  // Define mock register values for source registers
+  RegisterValue val = {0xABBACAFE, 8};
+  // Supply values for all source registers
+  insn.supplyOperand(0, val);
+  insn.supplyOperand(1, val);
+  // Ensure Instruction state has updated as expected
+  EXPECT_TRUE(insn.canExecute());
+  EXPECT_TRUE(insn.isOperandReady(0));
+  EXPECT_TRUE(insn.isOperandReady(1));
+  auto sourceVals = insn.getSourceOperands();
+  EXPECT_EQ(sourceVals.size(), 2);
+  EXPECT_EQ(sourceVals[0], val);
+  EXPECT_EQ(sourceVals[1], val);
+
+  // Ensure instruction execute updates instruction state as expected, and
+  // produces the expected result.
+  EXPECT_FALSE(insn.hasExecuted());
+  insn.execute();
+  EXPECT_TRUE(insn.hasExecuted());
+  auto results = insn.getResults();
+  RegisterValue refRes = {0x00000001, 8};
+  EXPECT_EQ(results.size(), 1);
+  EXPECT_EQ(results[0], refRes);
+}
+
+// Test that data can be supplied successfully
+TEST_F(RiscVInstructionTest, supplyData) {
+  // Insn is `lbu	a5, 0(s3)`
+  Instruction insn = Instruction(arch, *lbuMetadata.get());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {{RegisterType::GENERAL, 15}};
+  std::vector<Register> srcRegs = {{RegisterType::GENERAL, 19}};
+
+  // Check instruction created correctly
+  EXPECT_FALSE(insn.exceptionEncountered());
+  EXPECT_EQ(&insn.getMetadata(), lbuMetadata.get());
+  EXPECT_EQ(insn.getGroup(), InstructionGroups::LOAD_INT);
+
+  // Check source and destination registers extracted correctly
+  EXPECT_EQ(insn.getSourceRegisters().size(), srcRegs.size());
+  for (int i = 0; i < srcRegs.size(); i++) {
+    EXPECT_EQ(insn.getSourceRegisters()[i], srcRegs[i]);
+  }
+  EXPECT_EQ(insn.getDestinationRegisters().size(), destRegs.size());
+  for (int i = 0; i < destRegs.size(); i++) {
+    EXPECT_EQ(insn.getDestinationRegisters()[i], destRegs[i]);
+  }
+
+  // Supply needed operands
+  EXPECT_FALSE(insn.isOperandReady(0));
+  RegisterValue addr = {0x480, 8};
+  insn.supplyOperand(0, addr);
+  EXPECT_TRUE(insn.isOperandReady(0));
+
+  // Generate memory addresses
+  EXPECT_EQ(insn.getGeneratedAddresses().size(), 0);
+  insn.generateAddresses();
+  auto generatedAddresses = insn.getGeneratedAddresses();
+  EXPECT_EQ(generatedAddresses.size(), 1);
+  EXPECT_EQ(generatedAddresses[0].address, 0x480);
+  EXPECT_EQ(generatedAddresses[0].size, 1);
+
+  // Supply required data
+  EXPECT_FALSE(insn.hasAllData());
+  std::vector<RegisterValue> data = {{123, 1}};
+  EXPECT_EQ(generatedAddresses.size(), data.size());
+  insn.supplyData(generatedAddresses[0].address, data[0]);
+  // Ensure data was supplied correctly
+  auto retrievedData = insn.getData();
+  for (int i = 0; i < retrievedData.size(); i++) {
+    EXPECT_EQ(retrievedData[i], data[i]);
+  }
+  EXPECT_TRUE(insn.hasAllData());
+}
+
+// Test DataAbort Exception is triggered correctly when supplying data
+TEST_F(RiscVInstructionTest, supplyData_dataAbort) {
+  // Insn is `lbu	a5, 0(s3)`
+  Instruction insn = Instruction(arch, *lbuMetadata.get());
+  // Define instruction's registers
+  std::vector<Register> destRegs = {{RegisterType::GENERAL, 15}};
+  std::vector<Register> srcRegs = {{RegisterType::GENERAL, 19}};
+
+  // Check instruction created correctly
+  EXPECT_EQ(&insn.getMetadata(), lbuMetadata.get());
+  EXPECT_EQ(insn.getGroup(), InstructionGroups::LOAD_INT);
+
+  // Supply needed operands
+  EXPECT_FALSE(insn.isOperandReady(0));
+  RegisterValue addr = {0x480, 8};
+  insn.supplyOperand(0, addr);
+  EXPECT_TRUE(insn.isOperandReady(0));
+
+  // Generate memory addresses
+  EXPECT_EQ(insn.getGeneratedAddresses().size(), 0);
+  insn.generateAddresses();
+  auto generatedAddresses = insn.getGeneratedAddresses();
+  EXPECT_EQ(generatedAddresses.size(), 1);
+  EXPECT_EQ(generatedAddresses[0].address, 0x480);
+  EXPECT_EQ(generatedAddresses[0].size, 1);
+
+  // Trigger data abort
+  EXPECT_FALSE(insn.exceptionEncountered());
+  insn.supplyData(generatedAddresses[0].address, RegisterValue());
+  EXPECT_TRUE(insn.exceptionEncountered());
+  EXPECT_EQ(insn.getException(), InstructionException::DataAbort);
+}
+
+// Test to check logic around early branch misprediction logic
+TEST_F(RiscVInstructionTest, earlyBranchMisprediction) {
+  // Insn is `div	a3, a3, a0`
+  Instruction insn = Instruction(arch, *divMetadata.get());
+  insn.setInstructionAddress(64);
+
+  // Check initial state of an instruction's branch related options
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Unknown);
+  EXPECT_FALSE(insn.isBranch());
+  std::tuple<bool, uint64_t> tup = {false, insn.getInstructionAddress() + 4};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+
+  // Set prediction and ensure expected state changes / outcomes are seen
+  pred = {true, 0x4848};
+  insn.setBranchPrediction(pred);
+  matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Unknown);
+  // Check logic of `checkEarlyBranchMisprediction` which is different for
+  // non-branch instructions
+  EXPECT_FALSE(insn.isBranch());
+  tup = {true, insn.getInstructionAddress() + 4};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+}
+
+// Test that a correct prediction (branch taken) is handled correctly
+TEST_F(RiscVInstructionTest, correctPred_taken) {
+  // insn is `bgeu a5, a4, -86`
+  Instruction insn = Instruction(arch, *bgeuMetadata.get());
+  insn.setInstructionAddress(400);
+
+  // Check initial state of an instruction's branch related options
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Conditional);
+  EXPECT_TRUE(insn.isBranch());
+  std::tuple<bool, uint64_t> tup = {false, 0};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+
+  // Test a correct prediction where branch is taken is handled correctly
+  pred = {true, 400 - 86};
+  insn.setBranchPrediction(pred);
+  matchingPred = (insn.getBranchPrediction() == pred);
+  insn.supplyOperand(0, RegisterValue(3, 8));
+  insn.supplyOperand(1, RegisterValue(0, 8));
+  insn.execute();
+  EXPECT_TRUE(matchingPred);
+  EXPECT_TRUE(insn.wasBranchTaken());
+  EXPECT_FALSE(insn.wasBranchMispredicted());
+  EXPECT_EQ(insn.getBranchAddress(), pred.target);
+}
+
+// Test that a correct prediction (branch not taken) is handled correctly
+TEST_F(RiscVInstructionTest, correctPred_notTaken) {
+  // insn is `bgeu a5, a4, -86`
+  Instruction insn = Instruction(arch, *bgeuMetadata.get());
+  insn.setInstructionAddress(400);
+
+  // Check initial state of an instruction's branch related options
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Conditional);
+  EXPECT_TRUE(insn.isBranch());
+  std::tuple<bool, uint64_t> tup = {false, 0};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+
+  // Test a correct prediction where a branch isn't taken is handled correctly
+  // imm operand 0x28 has 4 added implicitly by dissassembler
+  pred = {false, 400 + 4};
+  insn.setBranchPrediction(pred);
+  matchingPred = (insn.getBranchPrediction() == pred);
+  insn.supplyOperand(0, RegisterValue(0, 8));
+  insn.supplyOperand(1, RegisterValue(3, 8));
+  insn.execute();
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_FALSE(insn.wasBranchMispredicted());
+  EXPECT_EQ(insn.getBranchAddress(), pred.target);
+}
+
+// Test that an incorrect prediction (wrong target) is handled correctly
+TEST_F(RiscVInstructionTest, incorrectPred_target) {
+  // insn is `bgeu a5, a4, -86`
+  Instruction insn = Instruction(arch, *bgeuMetadata.get());
+  insn.setInstructionAddress(400);
+
+  // Check initial state of an instruction's branch related options
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Conditional);
+  EXPECT_TRUE(insn.isBranch());
+  std::tuple<bool, uint64_t> tup = {false, 0};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+
+  // Test an incorrect prediction is handled correctly - target is wrong
+  // imm operand 0x28 has 4 added implicitly by dissassembler
+  pred = {true, 80 + (0x28 + 0x4)};
+  insn.setBranchPrediction(pred);
+  matchingPred = (insn.getBranchPrediction() == pred);
+  insn.supplyOperand(0, RegisterValue(3, 8));
+  insn.supplyOperand(1, RegisterValue(0, 8));
+  insn.execute();
+  EXPECT_TRUE(matchingPred);
+  EXPECT_TRUE(insn.wasBranchTaken());
+  EXPECT_TRUE(insn.wasBranchMispredicted());
+  EXPECT_EQ(insn.getBranchAddress(), 400 - 86);
+}
+
+// Test that an incorrect prediction (wrong taken) is handled correctly
+TEST_F(RiscVInstructionTest, incorrectPred_taken) {
+  // insn is `bgeu a5, a4, -86`
+  Instruction insn = Instruction(arch, *bgeuMetadata.get());
+  insn.setInstructionAddress(400);
+
+  // Check initial state of an instruction's branch related options
+  BranchPrediction pred = {false, 0};
+  bool matchingPred = (insn.getBranchPrediction() == pred);
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_EQ(insn.getBranchAddress(), 0);
+  EXPECT_EQ(insn.getBranchType(), BranchType::Conditional);
+  EXPECT_TRUE(insn.isBranch());
+  std::tuple<bool, uint64_t> tup = {false, 0};
+  EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup);
+
+  // Test an incorrect prediction is handled correctly - taken is wrong
+  // imm operand 0x28 has 4 added implicitly by dissassembler
+  pred = {true, 400 - 86};
+  insn.setBranchPrediction(pred);
+  matchingPred = (insn.getBranchPrediction() == pred);
+  insn.supplyOperand(0, RegisterValue(0, 8));
+  insn.supplyOperand(1, RegisterValue(3, 8));
+  insn.execute();
+  EXPECT_TRUE(matchingPred);
+  EXPECT_FALSE(insn.wasBranchTaken());
+  EXPECT_TRUE(insn.wasBranchMispredicted());
+  EXPECT_EQ(insn.getBranchAddress(), 400 + 4);
+}
+
+// Test commit and flush setters such as `setFlushed`, `setCommitReady`, etc.
+TEST_F(RiscVInstructionTest, setters) {
+  // Insn is `div	a3, a3, a0`
+  Instruction insn = Instruction(arch, *divMetadata.get());
+
+  EXPECT_FALSE(insn.canCommit());
+  insn.setCommitReady();
+  EXPECT_TRUE(insn.canCommit());
+
+  EXPECT_FALSE(insn.isFlushed());
+  insn.setFlushed();
+  EXPECT_TRUE(insn.isFlushed());
+
+  EXPECT_FALSE(insn.isWaitingCommit());
+  insn.setWaitingCommit();
+  EXPECT_TRUE(insn.isWaitingCommit());
+}
+
+}  // namespace riscv
+}  // namespace arch
+}  // namespace simeng
\ No newline at end of file