diff --git a/src/const_choice.rs b/src/const_choice.rs index 6e8e39af8..b34a7b657 100644 --- a/src/const_choice.rs +++ b/src/const_choice.rs @@ -16,9 +16,9 @@ impl ConstChoice { pub const TRUE: Self = Self(Word::MAX); #[inline] + #[allow(trivial_numeric_casts)] pub(crate) const fn as_u32_mask(&self) -> u32 { - #[allow(trivial_numeric_casts)] - (self.0.wrapping_neg() as u32).wrapping_neg() + self.0 as u32 } /// Returns the truthy value if `value == Word::MAX`, and the falsy value if `value == 0`. @@ -79,14 +79,13 @@ impl ConstChoice { /// Returns the truthy value if `x > y`, and the falsy value otherwise. #[inline] pub(crate) const fn from_word_gt(x: Word, y: Word) -> Self { - // See "Hacker's Delight" 2nd ed, section 2-12 (Comparison predicates) - let bit = (((!y) & x) | (((!y) | x) & (y.wrapping_sub(x)))) >> (Word::BITS - 1); - Self::from_word_lsb(bit) + Self::from_word_lt(y, x) } /// Returns the truthy value if `x < y`, and the falsy value otherwise. #[inline] pub(crate) const fn from_u32_lt(x: u32, y: u32) -> Self { + // See "Hacker's Delight" 2nd ed, section 2-12 (Comparison predicates) let bit = (((!x) & y) | (((!x) | y) & (x.wrapping_sub(y)))) >> (u32::BITS - 1); Self::from_u32_lsb(bit) } @@ -94,6 +93,7 @@ impl ConstChoice { /// Returns the truthy value if `x <= y` and the falsy value otherwise. #[inline] pub(crate) const fn from_word_le(x: Word, y: Word) -> Self { + // See "Hacker's Delight" 2nd ed, section 2-12 (Comparison predicates) let bit = (((!x) | y) & ((x ^ y) | !(y.wrapping_sub(x)))) >> (Word::BITS - 1); Self::from_word_lsb(bit) } @@ -101,6 +101,7 @@ impl ConstChoice { /// Returns the truthy value if `x <= y` and the falsy value otherwise. #[inline] pub(crate) const fn from_u32_le(x: u32, y: u32) -> Self { + // See "Hacker's Delight" 2nd ed, section 2-12 (Comparison predicates) let bit = (((!x) | y) & ((x ^ y) | !(y.wrapping_sub(x)))) >> (u32::BITS - 1); Self::from_u32_lsb(bit) } diff --git a/src/lib.rs b/src/lib.rs index eae9d9ad6..735db7aea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -170,6 +170,7 @@ mod checked; mod const_choice; mod limb; mod non_zero; +mod primitives; mod traits; mod uint; mod wrapping; diff --git a/src/limb/add.rs b/src/limb/add.rs index 0b11ad96e..7669a97ab 100644 --- a/src/limb/add.rs +++ b/src/limb/add.rs @@ -1,18 +1,25 @@ //! Limb addition -use crate::{Checked, CheckedAdd, Limb, WideWord, Word, Wrapping, WrappingAdd, Zero}; +use crate::{ + primitives::{adc, overflowing_add}, + Checked, CheckedAdd, Limb, Wrapping, WrappingAdd, Zero, +}; use core::ops::{Add, AddAssign}; use subtle::CtOption; impl Limb { + /// Computes `self + rhs`, returning the result along with the carry. + #[inline(always)] + pub const fn overflowing_add(self, rhs: Limb) -> (Limb, Limb) { + let (res, carry) = overflowing_add(self.0, rhs.0); + (Limb(res), Limb(carry)) + } + /// Computes `self + rhs + carry`, returning the result along with the new carry. #[inline(always)] pub const fn adc(self, rhs: Limb, carry: Limb) -> (Limb, Limb) { - let a = self.0 as WideWord; - let b = rhs.0 as WideWord; - let carry = carry.0 as WideWord; - let ret = a + b + carry; - (Limb(ret as Word), Limb((ret >> Self::BITS) as Word)) + let (res, carry) = adc(self.0, rhs.0, carry.0); + (Limb(res), Limb(carry)) } /// Perform saturating addition. @@ -69,7 +76,7 @@ impl AddAssign<&Checked> for Checked { impl CheckedAdd for Limb { #[inline] fn checked_add(&self, rhs: &Self) -> CtOption { - let (result, carry) = self.adc(*rhs, Limb::ZERO); + let (result, carry) = self.overflowing_add(*rhs); CtOption::new(result, carry.is_zero()) } } @@ -87,14 +94,14 @@ mod tests { #[test] fn adc_no_carry() { - let (res, carry) = Limb::ZERO.adc(Limb::ONE, Limb::ZERO); + let (res, carry) = Limb::ZERO.overflowing_add(Limb::ONE); assert_eq!(res, Limb::ONE); assert_eq!(carry, Limb::ZERO); } #[test] fn adc_with_carry() { - let (res, carry) = Limb::MAX.adc(Limb::ONE, Limb::ZERO); + let (res, carry) = Limb::MAX.overflowing_add(Limb::ONE); assert_eq!(res, Limb::ZERO); assert_eq!(carry, Limb::ONE); } diff --git a/src/limb/mul.rs b/src/limb/mul.rs index 481bbcf29..d3db5ded8 100644 --- a/src/limb/mul.rs +++ b/src/limb/mul.rs @@ -1,6 +1,9 @@ //! Limb multiplication -use crate::{Checked, CheckedMul, Limb, WideWord, Word, Wrapping, Zero}; +use crate::{ + primitives::{mac, mul_wide}, + Checked, CheckedMul, Limb, Wrapping, Zero, +}; use core::ops::{Mul, MulAssign}; use num_traits::WrappingMul; use subtle::CtOption; @@ -9,12 +12,8 @@ impl Limb { /// Computes `self + (b * c) + carry`, returning the result along with the new carry. #[inline(always)] pub const fn mac(self, b: Limb, c: Limb, carry: Limb) -> (Limb, Limb) { - let a = self.0 as WideWord; - let b = b.0 as WideWord; - let c = c.0 as WideWord; - let carry = carry.0 as WideWord; - let ret = a + (b * c) + carry; - (Limb(ret as Word), Limb((ret >> Self::BITS) as Word)) + let (res, carry) = mac(self.0, b.0, c.0, carry.0); + (Limb(res), Limb(carry)) } /// Perform saturating multiplication. @@ -30,17 +29,17 @@ impl Limb { } /// Compute "wide" multiplication, with a product twice the size of the input. - pub(crate) const fn mul_wide(&self, rhs: Self) -> WideWord { - (self.0 as WideWord) * (rhs.0 as WideWord) + pub(crate) const fn mul_wide(&self, rhs: Self) -> (Self, Self) { + let (lo, hi) = mul_wide(self.0, rhs.0); + (Limb(lo), Limb(hi)) } } impl CheckedMul for Limb { #[inline] fn checked_mul(&self, rhs: &Self) -> CtOption { - let result = self.mul_wide(*rhs); - let overflow = Limb((result >> Self::BITS) as Word); - CtOption::new(Limb(result as Word), overflow.is_zero()) + let (lo, hi) = self.mul_wide(*rhs); + CtOption::new(lo, hi.is_zero()) } } @@ -118,28 +117,7 @@ impl WrappingMul for Limb { #[cfg(test)] mod tests { - use super::{CheckedMul, Limb, WideWord}; - - #[test] - fn mul_wide_zero_and_one() { - assert_eq!(Limb::ZERO.mul_wide(Limb::ZERO), 0); - assert_eq!(Limb::ZERO.mul_wide(Limb::ONE), 0); - assert_eq!(Limb::ONE.mul_wide(Limb::ZERO), 0); - assert_eq!(Limb::ONE.mul_wide(Limb::ONE), 1); - } - - #[test] - fn mul_wide() { - let primes: &[u32] = &[3, 5, 17, 257, 65537]; - - for &a_int in primes { - for &b_int in primes { - let actual = Limb::from_u32(a_int).mul_wide(Limb::from_u32(b_int)); - let expected = a_int as WideWord * b_int as WideWord; - assert_eq!(actual, expected); - } - } - } + use super::{CheckedMul, Limb}; #[test] #[cfg(target_pointer_width = "32")] diff --git a/src/limb/sub.rs b/src/limb/sub.rs index 556f21d51..463446975 100644 --- a/src/limb/sub.rs +++ b/src/limb/sub.rs @@ -1,6 +1,6 @@ //! Limb subtraction -use crate::{Checked, CheckedSub, Limb, WideWord, Word, Wrapping, WrappingSub, Zero}; +use crate::{primitives::sbb, Checked, CheckedSub, Limb, Wrapping, WrappingSub, Zero}; use core::ops::{Sub, SubAssign}; use subtle::CtOption; @@ -8,11 +8,8 @@ impl Limb { /// Computes `self - (rhs + borrow)`, returning the result along with the new borrow. #[inline(always)] pub const fn sbb(self, rhs: Limb, borrow: Limb) -> (Limb, Limb) { - let a = self.0 as WideWord; - let b = rhs.0 as WideWord; - let borrow = (borrow.0 >> (Self::BITS - 1)) as WideWord; - let ret = a.wrapping_sub(b + borrow); - (Limb(ret as Word), Limb((ret >> Self::BITS) as Word)) + let (res, borrow) = sbb(self.0, rhs.0, borrow.0); + (Limb(res), Limb(borrow)) } /// Perform saturating subtraction. diff --git a/src/modular/boxed_residue/mul.rs b/src/modular/boxed_residue/mul.rs index ccb83c7e7..0c4783829 100644 --- a/src/modular/boxed_residue/mul.rs +++ b/src/modular/boxed_residue/mul.rs @@ -8,7 +8,7 @@ use super::{BoxedResidue, BoxedResidueParams}; use crate::{ modular::reduction::montgomery_reduction_boxed_mut, traits::Square, uint::mul::mul_limbs, - BoxedUint, Limb, WideWord, Word, Zero, + BoxedUint, Limb, Word, Zero, }; use core::{ borrow::Borrow, @@ -285,8 +285,8 @@ fn almost_montgomery_mul(z: &mut [Limb], x: &[Limb], y: &[Limb], m: &[Limb], k: fn add_mul_vvw(z: &mut [Limb], x: &[Limb], y: Limb) -> Limb { let mut c = Limb::ZERO; for (zi, xi) in z.iter_mut().zip(x.iter()) { - let (z1, z0) = mul_add_www(*xi, y, *zi); - let (c_, zi_) = add_ww(Limb(z0.0), c, Limb::ZERO); + let (z0, z1) = zi.mac(*xi, y, Limb::ZERO); + let (zi_, c_) = z0.overflowing_add(c); *zi = zi_; c = c_.wrapping_add(z1); } @@ -294,35 +294,14 @@ fn add_mul_vvw(z: &mut [Limb], x: &[Limb], y: Limb) -> Limb { c } -/// The resulting carry c is either 0 or 1. #[inline(always)] -fn sub_vv(z: &mut [Limb], x: &[Limb], y: &[Limb]) -> Limb { - let mut c = Limb::ZERO; +fn sub_vv(z: &mut [Limb], x: &[Limb], y: &[Limb]) { + let mut borrow = Limb::ZERO; for (i, (&xi, &yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) { - let zi = xi.wrapping_sub(yi).wrapping_sub(c); + let (zi, new_borrow) = xi.sbb(yi, borrow); z[i] = zi; - // See "Hacker's Delight" 2nd ed, section 2-13 (Overflow detection) - c = ((yi & !xi) | ((yi | !xi) & zi)) >> (Word::BITS - 1) + borrow = new_borrow; } - - c -} - -/// z1<<_W + z0 = x+y+c, with c == 0 or 1 -#[inline(always)] -fn add_ww(x: Limb, y: Limb, c: Limb) -> (Limb, Limb) { - let yc = y.wrapping_add(c); - let z0 = x.wrapping_add(yc); - // TODO(tarcieri): eliminate data-dependent branches - let z1 = Limb((z0.0 < x.0 || yc.0 < y.0) as Word); - (z1, z0) -} - -/// z1 << _W + z0 = x * y + c -#[inline] -fn mul_add_www(x: Limb, y: Limb, c: Limb) -> (Limb, Limb) { - let z = x.0 as WideWord * y.0 as WideWord + c.0 as WideWord; - (Limb((z >> Word::BITS) as Word), Limb(z as Word)) } #[cfg(test)] diff --git a/src/modular/reduction.rs b/src/modular/reduction.rs index a42e44d89..56ed0d534 100644 --- a/src/modular/reduction.rs +++ b/src/modular/reduction.rs @@ -1,20 +1,10 @@ //! Modular reduction implementation. -use crate::{Limb, Uint, WideWord, Word}; +use crate::{Limb, Uint}; #[cfg(feature = "alloc")] use {crate::BoxedUint, subtle::Choice}; -/// Returns `(hi, lo)` such that `hi * R + lo = x * y + z + w`. -#[inline(always)] -const fn muladdcarry(x: Word, y: Word, z: Word, w: Word) -> (Word, Word) { - let res = (x as WideWord) - .wrapping_mul(y as WideWord) - .wrapping_add(z as WideWord) - .wrapping_add(w as WideWord); - ((res >> Word::BITS) as Word, res as Word) -} - /// Implement the Montgomery reduction algorithm. /// /// This is implemented as a macro to abstract over `const fn` and boxed use cases, since the latter @@ -22,29 +12,29 @@ const fn muladdcarry(x: Word, y: Word, z: Word, w: Word) -> (Word, Word) { // TODO(tarcieri): change this into a `const fn` when `const_mut_refs` is stable macro_rules! impl_montgomery_reduction { ($upper:expr, $lower:expr, $modulus:expr, $mod_neg_inv:expr, $limbs:expr) => {{ - let mut meta_carry = Limb(0); + let mut meta_carry = Limb::ZERO; let mut new_sum; let mut i = 0; while i < $limbs { - let u = $lower[i].0.wrapping_mul($mod_neg_inv.0); + let u = $lower[i].wrapping_mul($mod_neg_inv); - let (mut carry, _) = muladdcarry(u, $modulus[0].0, $lower[i].0, 0); + let (_, mut carry) = $lower[i].mac(u, $modulus[0], Limb::ZERO); let mut new_limb; let mut j = 1; while j < ($limbs - i) { - (carry, new_limb) = muladdcarry(u, $modulus[j].0, $lower[i + j].0, carry); - $lower[i + j] = Limb(new_limb); + (new_limb, carry) = $lower[i + j].mac(u, $modulus[j], carry); + $lower[i + j] = new_limb; j += 1; } while j < $limbs { - (carry, new_limb) = muladdcarry(u, $modulus[j].0, $upper[i + j - $limbs].0, carry); - $upper[i + j - $limbs] = Limb(new_limb); + (new_limb, carry) = $upper[i + j - $limbs].mac(u, $modulus[j], carry); + $upper[i + j - $limbs] = new_limb; j += 1; } - (new_sum, meta_carry) = $upper[i].adc(Limb(carry), meta_carry); + (new_sum, meta_carry) = $upper[i].adc(carry, meta_carry); $upper[i] = new_sum; i += 1; diff --git a/src/primitives.rs b/src/primitives.rs new file mode 100644 index 000000000..5d79a5f6c --- /dev/null +++ b/src/primitives.rs @@ -0,0 +1,73 @@ +use crate::{WideWord, Word}; + +/// Multiplies `x` and `y`, returning the most significant +/// and the least significant words as `(hi, lo)`. +#[inline(always)] +pub(crate) const fn mulhilo(x: Word, y: Word) -> (Word, Word) { + let res = (x as WideWord) * (y as WideWord); + ((res >> Word::BITS) as Word, res as Word) +} + +/// Adds wide numbers represented by pairs of (most significant word, least significant word) +/// and returns the result in the same format `(hi, lo)`. +#[inline(always)] +pub(crate) const fn addhilo(x_hi: Word, x_lo: Word, y_hi: Word, y_lo: Word) -> (Word, Word) { + let res = (((x_hi as WideWord) << Word::BITS) | (x_lo as WideWord)) + + (((y_hi as WideWord) << Word::BITS) | (y_lo as WideWord)); + ((res >> Word::BITS) as Word, res as Word) +} + +/// Computes `lhs + rhs + carry`, returning the result along with the new carry (0, 1, or 2). +#[inline(always)] +pub const fn adc(lhs: Word, rhs: Word, carry: Word) -> (Word, Word) { + // We could use `Word::overflowing_add()` here analogous to `overflowing_add()`, + // but this version seems to produce a slightly better assembly. + let a = lhs as WideWord; + let b = rhs as WideWord; + let carry = carry as WideWord; + let ret = a + b + carry; + (ret as Word, (ret >> Word::BITS) as Word) +} + +/// Computes `lhs + rhs`, returning the result along with the carry (0 or 1). +#[inline(always)] +pub const fn overflowing_add(lhs: Word, rhs: Word) -> (Word, Word) { + let (res, carry) = lhs.overflowing_add(rhs); + (res, carry as Word) +} + +/// Computes `self - (rhs + borrow)`, returning the result along with the new borrow. +#[inline(always)] +pub const fn sbb(lhs: Word, rhs: Word, borrow: Word) -> (Word, Word) { + let a = lhs as WideWord; + let b = rhs as WideWord; + let borrow = (borrow >> (Word::BITS - 1)) as WideWord; + let ret = a.wrapping_sub(b + borrow); + (ret as Word, (ret >> Word::BITS) as Word) +} + +/// Computes `lhs * rhs`, returning the low and the high words of the result. +#[inline(always)] +pub const fn mul_wide(lhs: Word, rhs: Word) -> (Word, Word) { + let a = lhs as WideWord; + let b = rhs as WideWord; + let ret = a * b; + (ret as Word, (ret >> Word::BITS) as Word) +} + +/// Computes `a + (b * c) + carry`, returning the result along with the new carry. +#[inline(always)] +pub(crate) const fn mac(a: Word, b: Word, c: Word, carry: Word) -> (Word, Word) { + let a = a as WideWord; + let b = b as WideWord; + let c = c as WideWord; + let carry = carry as WideWord; + let ret = a + (b * c) + carry; + (ret as Word, (ret >> Word::BITS) as Word) +} + +/// Computes `(a * b) % d`. +#[inline(always)] +pub(crate) const fn mul_rem(a: Word, b: Word, d: Word) -> Word { + ((a as WideWord * b as WideWord) % (d as WideWord)) as Word +} diff --git a/src/uint/boxed/mul_mod.rs b/src/uint/boxed/mul_mod.rs index 5a38b2dab..adfa9af23 100644 --- a/src/uint/boxed/mul_mod.rs +++ b/src/uint/boxed/mul_mod.rs @@ -2,6 +2,7 @@ use crate::{ modular::{BoxedResidue, BoxedResidueParams}, + primitives::mul_rem, BoxedUint, Limb, MulMod, WideWord, Word, }; @@ -40,9 +41,8 @@ impl BoxedUint { // We implicitly assume `LIMBS > 0`, because `Uint<0>` doesn't compile. // Still the case `LIMBS == 1` needs special handling. if self.nlimbs() == 1 { - let prod = self.limbs[0].0 as WideWord * rhs.limbs[0].0 as WideWord; - let reduced = prod % Word::MIN.wrapping_sub(c.0) as WideWord; - return Self::from(reduced as Word); + let reduced = mul_rem(self.limbs[0].0, rhs.limbs[0].0, Word::MIN.wrapping_sub(c.0)); + return Self::from(reduced); } let product = self.mul(rhs); diff --git a/src/uint/div_limb.rs b/src/uint/div_limb.rs index 8e74b5335..f8d47f8bc 100644 --- a/src/uint/div_limb.rs +++ b/src/uint/div_limb.rs @@ -3,7 +3,10 @@ //! (DOI: 10.1109/TC.2010.143, ). use subtle::{Choice, ConditionallySelectable}; -use crate::{ConstChoice, Limb, NonZero, Uint, WideWord, Word}; +use crate::{ + primitives::{addhilo, mulhilo}, + ConstChoice, Limb, NonZero, Uint, Word, +}; /// Calculates the reciprocal of the given 32-bit divisor with the highmost bit set. #[cfg(target_pointer_width = "32")] @@ -111,23 +114,6 @@ const fn short_div(dividend: u32, dividend_bits: u32, divisor: u32, divisor_bits quotient } -/// Multiplies `x` and `y`, returning the most significant -/// and the least significant words as `(hi, lo)`. -#[inline(always)] -const fn mulhilo(x: Word, y: Word) -> (Word, Word) { - let res = (x as WideWord) * (y as WideWord); - ((res >> Word::BITS) as Word, res as Word) -} - -/// Adds wide numbers represented by pairs of (most significant word, least significant word) -/// and returns the result in the same format `(hi, lo)`. -#[inline(always)] -const fn addhilo(x_hi: Word, x_lo: Word, y_hi: Word, y_lo: Word) -> (Word, Word) { - let res = (((x_hi as WideWord) << Word::BITS) | (x_lo as WideWord)) - + (((y_hi as WideWord) << Word::BITS) | (y_lo as WideWord)); - ((res >> Word::BITS) as Word, res as Word) -} - /// Calculate the quotient and the remainder of the division of a wide word /// (supplied as high and low words) by `d`, with a precalculated reciprocal `v`. #[inline(always)] diff --git a/src/uint/mul.rs b/src/uint/mul.rs index 2ec0156ee..fb3175e6e 100644 --- a/src/uint/mul.rs +++ b/src/uint/mul.rs @@ -3,8 +3,7 @@ // TODO(tarcieri): use Karatsuba for better performance use crate::{ - Checked, CheckedMul, Concat, ConcatMixed, Limb, Uint, WideWord, WideningMul, Word, Wrapping, - WrappingMul, Zero, + Checked, CheckedMul, Concat, ConcatMixed, Limb, Uint, WideningMul, Wrapping, WrappingMul, Zero, }; use core::ops::{Mul, MulAssign}; use subtle::CtOption; @@ -158,13 +157,13 @@ impl Uint { } if (i * 2 + 1) < LIMBS { - let n = lo.limbs[i * 2 + 1].0 as WideWord + carry.0 as WideWord; - lo.limbs[i * 2 + 1] = Limb(n as Word); - carry = Limb((n >> Word::BITS) as Word); + let (n, c) = lo.limbs[i * 2 + 1].overflowing_add(carry); + lo.limbs[i * 2 + 1] = n; + carry = c; } else { - let n = hi.limbs[i * 2 + 1 - LIMBS].0 as WideWord + carry.0 as WideWord; - hi.limbs[i * 2 + 1 - LIMBS] = Limb(n as Word); - carry = Limb((n >> Word::BITS) as Word); + let (n, c) = hi.limbs[i * 2 + 1 - LIMBS].overflowing_add(carry); + hi.limbs[i * 2 + 1 - LIMBS] = n; + carry = c; } i += 1; diff --git a/src/uint/mul_mod.rs b/src/uint/mul_mod.rs index d13325f17..a77213a26 100644 --- a/src/uint/mul_mod.rs +++ b/src/uint/mul_mod.rs @@ -2,6 +2,7 @@ use crate::{ modular::{DynResidue, DynResidueParams}, + primitives::mul_rem, Limb, MulMod, Uint, WideWord, Word, }; @@ -38,9 +39,8 @@ impl Uint { // We implicitly assume `LIMBS > 0`, because `Uint<0>` doesn't compile. // Still the case `LIMBS == 1` needs special handling. if LIMBS == 1 { - let prod = self.limbs[0].0 as WideWord * rhs.limbs[0].0 as WideWord; - let reduced = prod % Word::MIN.wrapping_sub(c.0) as WideWord; - return Self::from_word(reduced as Word); + let reduced = mul_rem(self.limbs[0].0, rhs.limbs[0].0, Word::MIN.wrapping_sub(c.0)); + return Self::from_word(reduced); } let (lo, hi) = self.split_mul(rhs);