From 9fec65aef8043d92fecb792fad6e21e780fb437c Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 26 Apr 2023 19:57:34 +0100 Subject: [PATCH] Add a third register class For machines with completely separate vector registers it is useful to have a third register class. --- src/fuzzing/func.rs | 8 ++++---- src/ion/data_structures.rs | 4 ++-- src/ion/mod.rs | 4 ++-- src/ion/moves.rs | 24 +++++++++++++++--------- src/lib.rs | 36 +++++++++++++++++++++--------------- 5 files changed, 44 insertions(+), 32 deletions(-) diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 6996a4ca..3ed8eb1f 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -146,7 +146,7 @@ impl Function for Func { fn spillslot_size(&self, regclass: RegClass) -> usize { match regclass { RegClass::Int => 1, - RegClass::Float => 2, + RegClass::Float | RegClass::Vector => 2, } } } @@ -659,9 +659,9 @@ pub fn machine_env() -> MachineEnv { fn regs(r: core::ops::Range) -> Vec { r.map(|i| PReg::new(i, RegClass::Int)).collect() } - let preferred_regs_by_class: [Vec; 2] = [regs(0..24), vec![]]; - let non_preferred_regs_by_class: [Vec; 2] = [regs(24..32), vec![]]; - let scratch_by_class: [Option; 2] = [None, None]; + let preferred_regs_by_class: [Vec; 3] = [regs(0..24), vec![], vec![]]; + let non_preferred_regs_by_class: [Vec; 3] = [regs(24..32), vec![], vec![]]; + let scratch_by_class: [Option; 3] = [None, None, None]; let fixed_stack_slots = regs(32..63); // Register 63 is reserved for use as a fixed non-allocatable register. MachineEnv { diff --git a/src/ion/data_structures.rs b/src/ion/data_structures.rs index ecc903eb..0ca1371e 100644 --- a/src/ion/data_structures.rs +++ b/src/ion/data_structures.rs @@ -404,8 +404,8 @@ pub struct Env<'a, F: Function> { pub spillslots: Vec, pub slots_by_size: Vec, - pub extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 2], - pub preferred_victim_by_class: [PReg; 2], + pub extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 3], + pub preferred_victim_by_class: [PReg; 3], // When multiple fixed-register constraints are present on a // single VReg at a single program point (this can happen for, diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 550af90f..5622c3bf 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -70,8 +70,8 @@ impl<'a, F: Function> Env<'a, F> { slots_by_size: vec![], allocated_bundle_count: 0, - extra_spillslots_by_class: [smallvec![], smallvec![]], - preferred_victim_by_class: [PReg::invalid(), PReg::invalid()], + extra_spillslots_by_class: [smallvec![], smallvec![], smallvec![]], + preferred_victim_by_class: [PReg::invalid(), PReg::invalid(), PReg::invalid()], multi_fixed_reg_fixups: vec![], inserted_moves: vec![], diff --git a/src/ion/moves.rs b/src/ion/moves.rs index 32ae768a..5e0abd8d 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -812,14 +812,15 @@ impl<'a, F: Function> Env<'a, F> { redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos_prio.pos); last_pos = pos_prio.pos; - // Gather all the moves with Int class and Float class - // separately. These cannot interact, so it is safe to - // have two separate ParallelMove instances. They need to - // be separate because moves between the two classes are - // impossible. (We could enhance ParallelMoves to - // understand register classes, but this seems simpler.) + // Gather all the moves in each RegClass separately. + // These cannot interact, so it is safe to have separate + // ParallelMove instances. They need to be separate because + // moves between the classes are impossible. (We could + // enhance ParallelMoves to understand register classes, but + // this seems simpler.) let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; + let mut vec_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; for m in moves { if m.from_alloc == m.to_alloc { @@ -832,12 +833,17 @@ impl<'a, F: Function> Env<'a, F> { RegClass::Float => { float_moves.push(m.clone()); } + RegClass::Vector => { + vec_moves.push(m.clone()); + } } } - for &(regclass, moves) in - &[(RegClass::Int, &int_moves), (RegClass::Float, &float_moves)] - { + for &(regclass, moves) in &[ + (RegClass::Int, &int_moves), + (RegClass::Float, &float_moves), + (RegClass::Vector, &vec_moves), + ] { // All moves in `moves` semantically happen in // parallel. Let's resolve these to a sequence of moves // that can be done one at a time. diff --git a/src/lib.rs b/src/lib.rs index 8aa9d4c9..070608ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,17 +68,18 @@ use serde::{Deserialize, Serialize}; /// class; i.e., they are disjoint. /// /// For tight bit-packing throughout our data structures, we support -/// only two classes, "int" and "float". This will usually be enough -/// on modern machines, as they have one class of general-purpose +/// only three classes, "int", "float" and "vector". Usually two will +/// be enough on modern machines, as they have one class of general-purpose /// integer registers of machine width (e.g. 64 bits), and another /// class of float/vector registers used both for FP and for vector -/// operations. If needed, we could adjust bitpacking to allow for -/// more classes in the future. +/// operations. Additionally for machines with totally separate vector +/// registers a third class is provided. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum RegClass { Int = 0, Float = 1, + Vector = 2, } /// A physical register. Contains a physical register number and a class. @@ -104,7 +105,7 @@ pub struct PReg { impl PReg { pub const MAX_BITS: usize = 6; pub const MAX: usize = (1 << Self::MAX_BITS) - 1; - pub const NUM_INDEX: usize = 1 << (Self::MAX_BITS + 1); // including RegClass bit + pub const NUM_INDEX: usize = 1 << (Self::MAX_BITS + 2); // including RegClass bits /// Create a new PReg. The `hw_enc` range is 6 bits. #[inline(always)] @@ -124,10 +125,11 @@ impl PReg { /// The register class. #[inline(always)] pub const fn class(self) -> RegClass { - if self.bits & (1 << Self::MAX_BITS) == 0 { - RegClass::Int - } else { - RegClass::Float + match self.bits & (0b11 << Self::MAX_BITS) { + 0 => RegClass::Int, + 1 => RegClass::Float, + 2 => RegClass::Vector, + _ => unreachable!(), } } @@ -172,6 +174,7 @@ impl core::fmt::Display for PReg { let class = match self.class() { RegClass::Int => "i", RegClass::Float => "f", + RegClass::Vector => "v", }; write!(f, "p{}{}", self.hw_enc(), class) } @@ -299,21 +302,22 @@ impl VReg { pub const fn new(virt_reg: usize, class: RegClass) -> Self { debug_assert!(virt_reg <= VReg::MAX); VReg { - bits: ((virt_reg as u32) << 1) | (class as u8 as u32), + bits: ((virt_reg as u32) << 2) | (class as u8 as u32), } } #[inline(always)] pub const fn vreg(self) -> usize { - let vreg = (self.bits >> 1) as usize; + let vreg = (self.bits >> 2) as usize; vreg } #[inline(always)] pub const fn class(self) -> RegClass { - match self.bits & 1 { + match self.bits & 0b11 { 0 => RegClass::Int, 1 => RegClass::Float, + 2 => RegClass::Vector, _ => unreachable!(), } } @@ -734,6 +738,7 @@ impl Operand { match class_field { 0 => RegClass::Int, 1 => RegClass::Float, + 2 => RegClass::Vector, _ => unreachable!(), } } @@ -832,6 +837,7 @@ impl core::fmt::Display for Operand { match self.class() { RegClass::Int => "i", RegClass::Float => "f", + RegClass::Vector => "v", }, self.constraint() ) @@ -1337,7 +1343,7 @@ pub struct MachineEnv { /// /// If an explicit scratch register is provided in `scratch_by_class` then /// it must not appear in this list. - pub preferred_regs_by_class: [Vec; 2], + pub preferred_regs_by_class: [Vec; 3], /// Non-preferred physical registers for each class. These are the /// registers that will be allocated if a preferred register is @@ -1346,7 +1352,7 @@ pub struct MachineEnv { /// /// If an explicit scratch register is provided in `scratch_by_class` then /// it must not appear in this list. - pub non_preferred_regs_by_class: [Vec; 2], + pub non_preferred_regs_by_class: [Vec; 3], /// Optional dedicated scratch register per class. This is needed to perform /// moves between registers when cyclic move patterns occur. The @@ -1363,7 +1369,7 @@ pub struct MachineEnv { /// If a scratch register is not provided then the register allocator will /// automatically allocate one as needed, spilling a value to the stack if /// necessary. - pub scratch_by_class: [Option; 2], + pub scratch_by_class: [Option; 3], /// Some `PReg`s can be designated as locations on the stack rather than /// actual registers. These can be used to tell the register allocator about