From 2c851ea26d0f7e5f52565527fa54e69d9ade65d1 Mon Sep 17 00:00:00 2001 From: nicoo Date: Wed, 29 Jul 2020 19:39:50 +0200 Subject: [PATCH 01/19] factor: Introduce a type alias for exponents This way, we can easily replace u8 with a larger type when moving to support larger integers. --- src/uu/factor/src/factor.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 4e5322084cb..6a7f688acc5 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -61,6 +61,9 @@ impl PartialEq for Decomposition { true } +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Factors { + f: BTreeMap, } impl Eq for Decomposition {} From 35e5ae80390def65570a5d070b3699ce28006284 Mon Sep 17 00:00:00 2001 From: nicoo Date: Wed, 29 Jul 2020 19:47:44 +0200 Subject: [PATCH 02/19] factor::Factors: Split off a Decomposition type The new type can be used to represent in-progress factorisations, which contain non-prime factors. --- src/uu/factor/src/factor.rs | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 6a7f688acc5..d263151729c 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -62,22 +62,42 @@ impl PartialEq for Decomposition { true } #[derive(Clone, Debug, Eq, PartialEq)] -pub struct Factors { - f: BTreeMap, +struct Decomposition(BTreeMap); + +impl Decomposition { + fn one() -> Decomposition { + Decomposition(BTreeMap::new()) + } + + fn add(&mut self, factor: u64, exp: Exponent) { + debug_assert!(exp > 0); + let n = *self.0.get(&factor).unwrap_or(&0); + self.0.insert(factor, exp + n); + } + + #[cfg(test)] + fn product(&self) -> u64 { + self.0 + .iter() + .fold(1, |acc, (p, exp)| acc * p.pow(*exp as u32)) + } } impl Eq for Decomposition {} #[derive(Clone, Debug, Eq, PartialEq)] pub struct Factors(RefCell); +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Factors(Decomposition); + impl Factors { pub fn one() -> Factors { - Factors(RefCell::new(Decomposition::one())) + Factors(Decomposition::one()) } pub fn add(&mut self, prime: u64, exp: Exponent) { debug_assert!(miller_rabin::is_prime(prime)); - self.0.borrow_mut().add(prime, exp) + self.0.add(prime, exp) } pub fn push(&mut self, prime: u64) { @@ -86,16 +106,13 @@ impl Factors { #[cfg(test)] fn product(&self) -> u64 { - self.0.borrow().product() + self.0.product() } } impl fmt::Display for Factors { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let v = &mut (self.0).borrow_mut().0; - v.sort_unstable(); - - for (p, exp) in v.iter() { + for (p, exp) in (self.0).0.iter() { for _ in 0..*exp { write!(f, " {}", p)? } From 9e4c14db638eb7b164eb3b661f0f13c8cff161a2 Mon Sep 17 00:00:00 2001 From: nicoo Date: Wed, 29 Jul 2020 20:38:07 +0200 Subject: [PATCH 03/19] factor::Decomposition: Use a flat vector representation ~18% faster than BTreeMap, and ~5% faster than 'master' --- src/uu/factor/src/factor.rs | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index d263151729c..07cad84342d 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -7,7 +7,6 @@ extern crate rand; -use std::cell::RefCell; use std::fmt; use crate::numeric::{Arithmetic, Montgomery}; @@ -61,32 +60,9 @@ impl PartialEq for Decomposition { true } -#[derive(Clone, Debug, Eq, PartialEq)] -struct Decomposition(BTreeMap); - -impl Decomposition { - fn one() -> Decomposition { - Decomposition(BTreeMap::new()) - } - - fn add(&mut self, factor: u64, exp: Exponent) { - debug_assert!(exp > 0); - let n = *self.0.get(&factor).unwrap_or(&0); - self.0.insert(factor, exp + n); - } - - #[cfg(test)] - fn product(&self) -> u64 { - self.0 - .iter() - .fold(1, |acc, (p, exp)| acc * p.pow(*exp as u32)) - } } impl Eq for Decomposition {} -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Factors(RefCell); - #[derive(Clone, Debug, Eq, PartialEq)] pub struct Factors(Decomposition); @@ -112,7 +88,10 @@ impl Factors { impl fmt::Display for Factors { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for (p, exp) in (self.0).0.iter() { + let mut v = (self.0).0.clone(); + v.sort_unstable(); + + for (p, exp) in v.iter() { for _ in 0..*exp { write!(f, " {}", p)? } From b8a7943685f6fc9825ff7c67c5a3b8f9b8d047fd Mon Sep 17 00:00:00 2001 From: nicoo Date: Wed, 29 Jul 2020 20:45:45 +0200 Subject: [PATCH 04/19] factor::Factors: Use a RefCell rather than copy data when printing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ~2.9% faster than the previous commit, ~11% faster than “master” overall. --- src/uu/factor/src/factor.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 07cad84342d..4e5322084cb 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -7,6 +7,7 @@ extern crate rand; +use std::cell::RefCell; use std::fmt; use crate::numeric::{Arithmetic, Montgomery}; @@ -64,16 +65,16 @@ impl PartialEq for Decomposition { impl Eq for Decomposition {} #[derive(Clone, Debug, Eq, PartialEq)] -pub struct Factors(Decomposition); +pub struct Factors(RefCell); impl Factors { pub fn one() -> Factors { - Factors(Decomposition::one()) + Factors(RefCell::new(Decomposition::one())) } pub fn add(&mut self, prime: u64, exp: Exponent) { debug_assert!(miller_rabin::is_prime(prime)); - self.0.add(prime, exp) + self.0.borrow_mut().add(prime, exp) } pub fn push(&mut self, prime: u64) { @@ -82,13 +83,13 @@ impl Factors { #[cfg(test)] fn product(&self) -> u64 { - self.0.product() + self.0.borrow().product() } } impl fmt::Display for Factors { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut v = (self.0).0.clone(); + let v = &mut (self.0).borrow_mut().0; v.sort_unstable(); for (p, exp) in v.iter() { From 34a472a7ce765cfd7158adacf920f37e8203f248 Mon Sep 17 00:00:00 2001 From: nicoo Date: Wed, 29 Jul 2020 20:27:54 +0200 Subject: [PATCH 05/19] factor: Derecursify and refactor ~7% slowdown, paves the way for upcoming improvements --- src/uu/factor/src/factor.rs | 87 ++++++++++++++++++++++--------------- src/uu/factor/src/table.rs | 5 ++- 2 files changed, 54 insertions(+), 38 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 4e5322084cb..dfe6e9f1ad3 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -33,7 +33,14 @@ impl Decomposition { } } - #[cfg(test)] + fn is_one(&self) -> bool { + self.0.is_empty() + } + + fn pop(&mut self) -> Option<(u64, Exponent)> { + self.0.pop() + } + fn product(&self) -> u64 { self.0 .iter() @@ -77,11 +84,11 @@ impl Factors { self.0.borrow_mut().add(prime, exp) } + #[cfg(test)] pub fn push(&mut self, prime: u64) { self.add(prime, 1) } - #[cfg(test)] fn product(&self) -> u64 { self.0.borrow().product() } @@ -102,62 +109,70 @@ impl fmt::Display for Factors { } } -fn _factor(num: u64, f: Factors) -> Factors { +fn _find_factor(num: u64) -> Option { use miller_rabin::Result::*; - // Shadow the name, so the recursion automatically goes from “Big” arithmetic to small. - let _factor = |n, f| { - if n < (1 << 32) { - _factor::>(n, f) - } else { - _factor::(n, f) - } - }; - - if num == 1 { - return f; - } - let n = A::new(num); - let divisor = match miller_rabin::test::(n) { - Prime => { - let mut r = f; - r.push(num); - return r; - } - - Composite(d) => d, - Pseudoprime => rho::find_divisor::(n), - }; + match miller_rabin::test::(n) { + Prime => None, + Composite(d) => Some(d), + Pseudoprime => Some(rho::find_divisor::(n)), + } +} - let f = _factor(divisor, f); - _factor(num / divisor, f) +fn find_factor(num: u64) -> Option { + if num < (1 << 32) { + _find_factor::>(num) + } else { + _find_factor::>(num) + } } -pub fn factor(mut n: u64) -> Factors { +pub fn factor(num: u64) -> Factors { let mut factors = Factors::one(); - if n < 2 { + if num < 2 { return factors; } - let z = n.trailing_zeros(); + let mut n = num; + let z = num.trailing_zeros(); if z > 0 { factors.add(2, z as Exponent); n >>= z; } + debug_assert_eq!(num, n * factors.product()); if n == 1 { return factors; } - let (factors, n) = table::factor(n, factors); + table::factor(&mut n, &mut factors); + debug_assert_eq!(num, n * factors.product()); - if n < (1 << 32) { - _factor::>(n, factors) - } else { - _factor::>(n, factors) + if n == 1 { + return factors; + } + + let mut dec = Decomposition::one(); + dec.add(n, 1); + + while !dec.is_one() { + // Check correctness invariant + debug_assert_eq!(num, factors.product() * dec.product()); + + let (f, e) = dec.pop().unwrap(); + + if let Some(d) = find_factor(f) { + dec.add(f / d, e); + dec.add(d, e); + } else { + // f is prime + factors.add(f, e); + } } + + factors } #[cfg(test)] diff --git a/src/uu/factor/src/table.rs b/src/uu/factor/src/table.rs index d6ef796fc03..b62e801cbc7 100644 --- a/src/uu/factor/src/table.rs +++ b/src/uu/factor/src/table.rs @@ -14,7 +14,8 @@ use crate::Factors; include!(concat!(env!("OUT_DIR"), "/prime_table.rs")); -pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { +pub(crate) fn factor(n: &mut u64, factors: &mut Factors) { + let mut num = *n; for &(prime, inv, ceil) in P_INVS_U64 { if num == 1 { break; @@ -42,5 +43,5 @@ pub(crate) fn factor(mut num: u64, mut factors: Factors) -> (Factors, u64) { } } - (factors, num) + *n = num; } From cb41a27d94db02469383001b0a2d919e0b5e718d Mon Sep 17 00:00:00 2001 From: nicoo Date: Fri, 31 Jul 2020 14:59:52 +0200 Subject: [PATCH 06/19] factor: Ensure we only need to find every single factor once [WiP] ~17% faster, many optimisation opportunities still missed >:) --- src/uu/factor/src/factor.rs | 40 ++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index dfe6e9f1ad3..3f0f73c9bd5 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -10,7 +10,7 @@ extern crate rand; use std::cell::RefCell; use std::fmt; -use crate::numeric::{Arithmetic, Montgomery}; +use crate::numeric::{gcd, Arithmetic, Montgomery}; use crate::{miller_rabin, rho, table}; type Exponent = u8; @@ -164,8 +164,42 @@ pub fn factor(num: u64) -> Factors { let (f, e) = dec.pop().unwrap(); if let Some(d) = find_factor(f) { - dec.add(f / d, e); - dec.add(d, e); + let mut gcd_queue = Decomposition::one(); + gcd_queue.add(d, e); + gcd_queue.add(f / d, e); + + let mut non_trivial_gcd = true; + while non_trivial_gcd { + debug_assert_eq!(f, gcd_queue.product()); + + let mut tmp = Decomposition::one(); + non_trivial_gcd = false; + for i in 0..gcd_queue.0.len() - 1 { + let (a, e_a) = gcd_queue.0[i]; + let (b, e_b) = gcd_queue.0[i + 1]; + + let g = gcd(a, b); + if g != 1 { + non_trivial_gcd = true; + tmp.add(a / g, e_a); + tmp.add(g, e_a + e_b); + if i + 1 == gcd_queue.0.len() { + tmp.add(b / g, e_b) + } else { + gcd_queue.0[i + 1] = (b / g, e_b); + } + } else { + tmp.add(a, e_a); + if i + 1 == gcd_queue.0.len() - 1 { + tmp.add(b, e_b) + } + } + } + gcd_queue = tmp; + } + + debug_assert_eq!(f, gcd_queue.product()); + dec.0.extend(gcd_queue.0); } else { // f is prime factors.add(f, e); From c3e1159a8aaf78188b17004bc90d79806ecbabd7 Mon Sep 17 00:00:00 2001 From: nicoo Date: Fri, 31 Jul 2020 15:03:27 +0200 Subject: [PATCH 07/19] factor::Decomposition: Optimise as a factor is never added twice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The invariant is checked by a debug_assert!, and follows from the previous commit, as `dec` and `factors` only ever contains coprime numbers: - true at the start: factor = ∅ and dec = { n¹ } ; - on every loop iteration, we pull out an element `f` from `dec` and either: - discover it is prime, and add it to `factors` ; - split it into a number of coprime factors, that get reinserted into `dec`; the invariant is maintained, as all divisors of `f` are coprime with all numbers in `dec` and `factors` (as `f` itself is coprime with them. As we only add elements to `Decomposition` objects that are coprime with the existing ones, they are distinct. --- src/uu/factor/src/factor.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 3f0f73c9bd5..7dc4b90d97b 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -25,12 +25,10 @@ impl Decomposition { fn add(&mut self, factor: u64, exp: Exponent) { debug_assert!(exp > 0); + // Assert the factor doesn't already exist in the Decomposition object + debug_assert_eq!(self.0.iter_mut().find(|(f, _)| *f == factor), None); - if let Some((_, e)) = self.0.iter_mut().find(|(f, _)| *f == factor) { - *e += exp; - } else { - self.0.push((factor, exp)) - } + self.0.push((factor, exp)) } fn is_one(&self) -> bool { From cccb725b04157a21b90b7dbd2f1db34da8c86f52 Mon Sep 17 00:00:00 2001 From: nicoo Date: Sun, 9 Aug 2020 17:23:57 +0200 Subject: [PATCH 08/19] factor: Slightly refactor main loop, fix bug --- src/uu/factor/src/factor.rs | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 7dc4b90d97b..f6da886a14e 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -173,24 +173,30 @@ pub fn factor(num: u64) -> Factors { let mut tmp = Decomposition::one(); non_trivial_gcd = false; for i in 0..gcd_queue.0.len() - 1 { - let (a, e_a) = gcd_queue.0[i]; - let (b, e_b) = gcd_queue.0[i + 1]; + let (mut a, e_a) = gcd_queue.0[i]; + let (mut b, e_b) = gcd_queue.0[i + 1]; + + if a == 1 { + continue; + } let g = gcd(a, b); if g != 1 { non_trivial_gcd = true; - tmp.add(a / g, e_a); - tmp.add(g, e_a + e_b); - if i + 1 == gcd_queue.0.len() { - tmp.add(b / g, e_b) - } else { - gcd_queue.0[i + 1] = (b / g, e_b); - } - } else { + a /= g; + b /= g; + } + if a != 1 { tmp.add(a, e_a); - if i + 1 == gcd_queue.0.len() - 1 { - tmp.add(b, e_b) - } + } + if g != 1 { + tmp.add(g, e_a + e_b); + } + + if i + 1 != gcd_queue.0.len() - 1 { + gcd_queue.0[i + 1].0 = b; + } else if b != 1 { + tmp.add(b, e_b); } } gcd_queue = tmp; From 6fff4525d5d0db6aee569b36b4b2025b1a484c43 Mon Sep 17 00:00:00 2001 From: nicoo Date: Wed, 29 Jul 2020 20:51:05 +0200 Subject: [PATCH 09/19] factor::Decomposition: Inline a small number (4) of factors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This avoids allocating on the heap when factoring most numbers, without using much space on the stack. This is ~3.5% faster than the previous commit, and ~8.3% faster than “master”. --- Cargo.lock | 7 +++++++ src/uu/factor/Cargo.toml | 1 + src/uu/factor/src/factor.rs | 10 ++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f0b3f99d99c..007bc9d5e3d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1240,6 +1240,11 @@ dependencies = [ "generic-array 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "smallvec" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "strsim" version = "0.8.0" @@ -1636,6 +1641,7 @@ dependencies = [ "quickcheck 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", "rand_chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)", "uucore 0.0.4 (git+https://github.com/uutils/uucore.git?branch=canary)", "uucore_procs 0.0.4 (git+https://github.com/uutils/uucore.git?branch=canary)", ] @@ -2679,6 +2685,7 @@ dependencies = [ "checksum sha1 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2579985fda508104f7587689507983eadd6a6e84dd35d6d115361f530916fa0d" "checksum sha2 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7d963c78ce367df26d7ea8b8cc655c651b42e8a1e584e869c1e17dae3ccb116a" "checksum sha3 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "26405905b6a56a94c60109cfda62610507ac14a65be531f5767dec5c5a8dd6a0" +"checksum smallvec 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3757cb9d89161a2f24e1cf78efa0c1fcff485d18e3f55e0aa3480824ddaa0f3f" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" "checksum syn 1.0.31 (registry+https://github.com/rust-lang/crates.io-index)" = "b5304cfdf27365b7585c25d4af91b35016ed21ef88f17ced89c7093b43dba8b6" diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index c4dbc96ccc6..a37ee524952 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -18,6 +18,7 @@ num-traits = "0.2" # used in src/numerics.rs, which is included by build.rs [dependencies] num-traits = "0.2" rand = { version="0.7", features=["small_rng"] } +smallvec = { version="0.6.13, < 1.0" } uucore = { version="0.0.4", package="uucore", git="https://github.com/uutils/uucore.git", branch="canary" } uucore_procs = { version="0.0.4", package="uucore_procs", git="https://github.com/uutils/uucore.git", branch="canary" } diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index f6da886a14e..b6a38ab7924 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -7,6 +7,7 @@ extern crate rand; +use smallvec::SmallVec; use std::cell::RefCell; use std::fmt; @@ -16,11 +17,16 @@ use crate::{miller_rabin, rho, table}; type Exponent = u8; #[derive(Clone, Debug)] -struct Decomposition(Vec<(u64, Exponent)>); +struct Decomposition(SmallVec<[(u64, Exponent); NUM_FACTORS_INLINE]>); + +// The number of factors to inline directly into a `Decomposition` object. +// As a consequence of the Erdős–Kac theorem, the average number of prime +// factors of integers < 10²⁵ ≃ 2⁸³ is 4, so we can use that value. +const NUM_FACTORS_INLINE: usize = 4; impl Decomposition { fn one() -> Decomposition { - Decomposition(Vec::new()) + Decomposition(SmallVec::new()) } fn add(&mut self, factor: u64, exp: Exponent) { From d45db1240a9df7a935a18682b8f66c9297a691e5 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Wed, 7 Oct 2020 21:21:14 -0500 Subject: [PATCH 10/19] factor/refactor ~ fix `cargo clippy` complaints (allow many_single_char_names) --- src/uu/factor/src/factor.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index b6a38ab7924..ef34af03026 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -140,10 +140,10 @@ pub fn factor(num: u64) -> Factors { } let mut n = num; - let z = num.trailing_zeros(); - if z > 0 { - factors.add(2, z as Exponent); - n >>= z; + let n_zeros = num.trailing_zeros(); + if n_zeros > 0 { + factors.add(2, n_zeros as Exponent); + n >>= n_zeros; } debug_assert_eq!(num, n * factors.product()); @@ -165,22 +165,22 @@ pub fn factor(num: u64) -> Factors { // Check correctness invariant debug_assert_eq!(num, factors.product() * dec.product()); - let (f, e) = dec.pop().unwrap(); + let (factor, exp) = dec.pop().unwrap(); - if let Some(d) = find_factor(f) { + if let Some(divisor) = find_factor(factor) { let mut gcd_queue = Decomposition::one(); - gcd_queue.add(d, e); - gcd_queue.add(f / d, e); + gcd_queue.add(divisor, exp); + gcd_queue.add(factor / divisor, exp); let mut non_trivial_gcd = true; while non_trivial_gcd { - debug_assert_eq!(f, gcd_queue.product()); + debug_assert_eq!(factor, gcd_queue.product()); let mut tmp = Decomposition::one(); non_trivial_gcd = false; for i in 0..gcd_queue.0.len() - 1 { - let (mut a, e_a) = gcd_queue.0[i]; - let (mut b, e_b) = gcd_queue.0[i + 1]; + let (mut a, exp_a) = gcd_queue.0[i]; + let (mut b, exp_b) = gcd_queue.0[i + 1]; if a == 1 { continue; @@ -193,26 +193,26 @@ pub fn factor(num: u64) -> Factors { b /= g; } if a != 1 { - tmp.add(a, e_a); + tmp.add(a, exp_a); } if g != 1 { - tmp.add(g, e_a + e_b); + tmp.add(g, exp_a + exp_b); } if i + 1 != gcd_queue.0.len() - 1 { gcd_queue.0[i + 1].0 = b; } else if b != 1 { - tmp.add(b, e_b); + tmp.add(b, exp_b); } } gcd_queue = tmp; } - debug_assert_eq!(f, gcd_queue.product()); + debug_assert_eq!(factor, gcd_queue.product()); dec.0.extend(gcd_queue.0); } else { - // f is prime - factors.add(f, e); + // factor is prime + factors.add(factor, exp); } } From 9ad69fb4f1d02f2bed88cb047bd69ebcdf73e98a Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Wed, 7 Oct 2020 22:01:09 -0500 Subject: [PATCH 11/19] tests/factor ~ update RNG usage and variable reports to ease debugging --- Cargo.toml | 2 +- tests/by-util/test_factor.rs | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d6f97ad21a6..b68d61e1c8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -330,7 +330,7 @@ pin_winapi-util = { version="0.1.2, < 0.1.3", package="winapi-util" } ## winapi- [dev-dependencies] filetime = "0.2" libc = "0.2" -rand = "0.6" +rand = "0.7" regex = "1.0" tempfile = "3.1" time = "0.1" diff --git a/tests/by-util/test_factor.rs b/tests/by-util/test_factor.rs index 358f38fd3ac..d0eaebac87d 100644 --- a/tests/by-util/test_factor.rs +++ b/tests/by-util/test_factor.rs @@ -6,6 +6,7 @@ // that was distributed with this source code. use crate::common::util::*; +use std::time::SystemTime; #[path = "../../src/uu/factor/sieve.rs"] mod sieve; @@ -13,7 +14,7 @@ use self::sieve::Sieve; extern crate rand; use self::rand::distributions::{Distribution, Uniform}; -use self::rand::{rngs::SmallRng, FromEntropy, Rng}; +use self::rand::{rngs::SmallRng, Rng, SeedableRng}; const NUM_PRIMES: usize = 10000; const LOG_PRIMES: f64 = 14.0; // ceil(log2(NUM_PRIMES)) @@ -24,8 +25,13 @@ const NUM_TESTS: usize = 100; fn test_random() { let primes = Sieve::primes().take(NUM_PRIMES).collect::>(); - let mut rng = SmallRng::from_entropy(); - println!("(seed) rng={:?}", rng); + let rng_seed = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(); + println!("rng_seed={:?}", rng_seed); + + let mut rng = SmallRng::seed_from_u64(rng_seed); let mut rand_gt = move |min: u64| { let mut product = 1u64; let mut factors = Vec::new(); @@ -73,8 +79,12 @@ fn test_random() { #[test] fn test_random_big() { - let mut rng = SmallRng::from_entropy(); - println!("(seed) rng={:?}", rng); + let rng_seed = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(); + println!("rng_seed={:?}", rng_seed); + let mut rng = SmallRng::seed_from_u64(rng_seed); let bitrange_1 = Uniform::new(14usize, 51); let mut rand_64 = move || { // first, choose a random number of bits for the first factor @@ -161,6 +171,8 @@ fn test_big_primes() { } fn run(instring: &[u8], outstring: &[u8]) { + println!("STDIN='{}'", String::from_utf8_lossy(instring)); + println!("STDOUT(expected)='{}'", String::from_utf8_lossy(outstring)); // now run factor new_ucmd!() .pipe_in(instring) From ac893ddf473751f55137a5d97b9d440f85a6a345 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Wed, 7 Oct 2020 22:59:44 -0500 Subject: [PATCH 12/19] tests ~ (sub-crate/factor) add tests for known prior factorization failures --- src/uu/factor/src/factor.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index ef34af03026..4f625441cdf 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -224,6 +224,19 @@ mod tests { use super::{factor, Factors}; use quickcheck::quickcheck; + #[test] + fn factor_correctly_recombines_prior_test_failures() { + let prior_failures = [ + // * integers with duplicate factors (ie, N.pow(M)) + 4566769_u64, // == 2137.pow(2) + 2044854919485649_u64, + 18446739546814299361_u64, + 18446738440860217487_u64, + 18446736729316206481_u64, + ]; + assert!(prior_failures.iter().all(|i| factor(*i).product() == *i)); + } + #[test] fn factor_recombines_small() { assert!((1..10_000) @@ -231,6 +244,15 @@ mod tests { .all(|i| factor(i).product() == i)); } + #[test] + fn factor_recombines_small_squares() { + // factor(18446736729316206481) == 4294966441 ** 2 ; causes debug_assert fault for repeated decomposition factor in add() + // ToDO: explain/combine with factor_18446736729316206481 and factor_18446739546814299361 tests + assert!((1..10_000) + .map(|i| (2 * i + 1) * (2 * i + 1)) + .all(|i| factor(i).product() == i)); + } + #[test] fn factor_recombines_overflowing() { assert!((0..250) From 86a50385719230cab70f3a46148bcbf3c0715be9 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Thu, 8 Oct 2020 00:07:03 -0500 Subject: [PATCH 13/19] fix/factor ~ fix fault when factoring number composed of a squared factor --- src/uu/factor/src/factor.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 4f625441cdf..ffb099f2adb 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -169,10 +169,16 @@ pub fn factor(num: u64) -> Factors { if let Some(divisor) = find_factor(factor) { let mut gcd_queue = Decomposition::one(); - gcd_queue.add(divisor, exp); - gcd_queue.add(factor / divisor, exp); - let mut non_trivial_gcd = true; + let quotient = factor / divisor; + if quotient == divisor { + gcd_queue.add(divisor, exp + 1); + } else { + gcd_queue.add(divisor, exp); + gcd_queue.add(quotient, exp); + } + + let mut non_trivial_gcd = quotient != divisor; while non_trivial_gcd { debug_assert_eq!(factor, gcd_queue.product()); From d3bad8b50a2f0e977f1f224d2b96fad828f4d9b9 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Thu, 8 Oct 2020 18:58:09 -0500 Subject: [PATCH 14/19] perf/factor ~ improve factor() quotient and loop comparison (~6% time improvement) --- src/uu/factor/src/factor.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index ffb099f2adb..3cf9783a267 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -171,19 +171,19 @@ pub fn factor(num: u64) -> Factors { let mut gcd_queue = Decomposition::one(); let quotient = factor / divisor; - if quotient == divisor { + let mut trivial_gcd = quotient == divisor; + if trivial_gcd { gcd_queue.add(divisor, exp + 1); } else { gcd_queue.add(divisor, exp); gcd_queue.add(quotient, exp); } - let mut non_trivial_gcd = quotient != divisor; - while non_trivial_gcd { + while !trivial_gcd { debug_assert_eq!(factor, gcd_queue.product()); let mut tmp = Decomposition::one(); - non_trivial_gcd = false; + trivial_gcd = true; for i in 0..gcd_queue.0.len() - 1 { let (mut a, exp_a) = gcd_queue.0[i]; let (mut b, exp_b) = gcd_queue.0[i + 1]; @@ -194,7 +194,7 @@ pub fn factor(num: u64) -> Factors { let g = gcd(a, b); if g != 1 { - non_trivial_gcd = true; + trivial_gcd = false; a /= g; b /= g; } From ac41d41bdf0ba956af3e5271c9d9dc86fa253a63 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Thu, 8 Oct 2020 21:13:26 -0500 Subject: [PATCH 15/19] perf/factor ~ tune number of stack inlined decomposition values (~1% time improvement) --- src/uu/factor/src/factor.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 3cf9783a267..524ee938127 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -20,9 +20,9 @@ type Exponent = u8; struct Decomposition(SmallVec<[(u64, Exponent); NUM_FACTORS_INLINE]>); // The number of factors to inline directly into a `Decomposition` object. -// As a consequence of the Erdős–Kac theorem, the average number of prime -// factors of integers < 10²⁵ ≃ 2⁸³ is 4, so we can use that value. -const NUM_FACTORS_INLINE: usize = 4; +// As a consequence of the Erdős–Kac theorem, the average number of prime factors +// of integers < 10²⁵ ≃ 2⁸³ is 4, so we can use a slightly higher value. +const NUM_FACTORS_INLINE: usize = 5; impl Decomposition { fn one() -> Decomposition { From 002d76056e698fb5e70e90601cff999a0222aac4 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Thu, 8 Oct 2020 22:45:02 -0500 Subject: [PATCH 16/19] tests/factor ~ test first 100000 integers for expected results --- Cargo.toml | 1 + tests/by-util/test_factor.rs | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index b68d61e1c8f..38881725041 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -332,6 +332,7 @@ filetime = "0.2" libc = "0.2" rand = "0.7" regex = "1.0" +sha1 = { version="0.6", features=["std"] } tempfile = "3.1" time = "0.1" unindent = "0.1" diff --git a/tests/by-util/test_factor.rs b/tests/by-util/test_factor.rs index d0eaebac87d..33c184ba005 100644 --- a/tests/by-util/test_factor.rs +++ b/tests/by-util/test_factor.rs @@ -21,6 +21,27 @@ const LOG_PRIMES: f64 = 14.0; // ceil(log2(NUM_PRIMES)) const NUM_TESTS: usize = 100; +#[test] +fn test_first_100000_integers() { + extern crate sha1; + + let n_integers = 100_000; + let mut instring = String::new(); + for i in 0..=n_integers { + instring.push_str(&(format!("{} ", i))[..]); + } + + println!("STDIN='{}'", instring); + let result = new_ucmd!().pipe_in(instring.as_bytes()).run(); + let stdout = result.stdout; + + assert!(result.success); + + // `seq 0 100000 | factor | sha1sum` => "4ed2d8403934fa1c76fe4b84c5d4b8850299c359" + let hash_check = sha1::Sha1::from(stdout.as_bytes()).hexdigest(); + assert_eq!(hash_check, "4ed2d8403934fa1c76fe4b84c5d4b8850299c359"); +} + #[test] fn test_random() { let primes = Sieve::primes().take(NUM_PRIMES).collect::>(); From 4284a5c0c9bebeda6386146f07304f18416f6e57 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Thu, 8 Oct 2020 22:59:11 -0500 Subject: [PATCH 17/19] polish/factor ~ correct spelling --- src/uu/factor/src/factor.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs index 524ee938127..a7dc4fa2e24 100644 --- a/src/uu/factor/src/factor.rs +++ b/src/uu/factor/src/factor.rs @@ -269,7 +269,7 @@ mod tests { #[test] fn factor_recombines_strong_pseudoprime() { // This is a strong pseudoprime (wrt. miller_rabin::BASIS) - // and triggered a bug in rho::factor's codepath handling + // and triggered a bug in rho::factor's code path handling // miller_rabbin::Result::Composite let pseudoprime = 17179869183; for _ in 0..20 { @@ -300,7 +300,7 @@ impl quickcheck::Arbitrary for Factors { let mut n = u64::MAX; // Adam Kalai's algorithm for generating uniformly-distributed - // integers and their factorisation. + // integers and their factorization. // // See Generating Random Factored Numbers, Easily, J. Cryptology (2003) 'attempt: loop { From b0439765da9cd414bbcce1e80ca7eda6c3774ce2 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Thu, 8 Oct 2020 23:00:33 -0500 Subject: [PATCH 18/19] tests ~ (sub-crate factor) refactor divisor() test for improved readability --- src/uu/factor/src/numeric/gcd.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/uu/factor/src/numeric/gcd.rs b/src/uu/factor/src/numeric/gcd.rs index 01e4a23bd49..96a0366a217 100644 --- a/src/uu/factor/src/numeric/gcd.rs +++ b/src/uu/factor/src/numeric/gcd.rs @@ -79,7 +79,11 @@ mod tests { fn divisor(a: u64, b: u64) -> bool { // Test that gcd(a, b) divides a and b let g = gcd(a, b); - (g != 0 && a % g == 0 && b % g == 0) || (g == 0 && a == 0 && b == 0) + if g != 0 { + a % g == 0 && b % g == 0 + } else { + a == 0 && b == 0 // for g == 0 + } } fn commutative(a: u64, b: u64) -> bool { From 5f7d6c372e8629769afc29de231f584ee33b6e36 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Sat, 10 Oct 2020 12:53:09 -0500 Subject: [PATCH 19/19] tests/factor ~ refactor for readability + improve DRY --- Cargo.toml | 1 + tests/by-util/test_factor.rs | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 38881725041..8f61ef4ec08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -328,6 +328,7 @@ pin_same-file = { version="1.0.4, < 1.0.6", package="same-file" } ## same-file v pin_winapi-util = { version="0.1.2, < 0.1.3", package="winapi-util" } ## winapi-util v0.1.3 has compiler errors for MinRustV v1.32.0, expects 1.34 [dev-dependencies] +conv = "0.3" filetime = "0.2" libc = "0.2" rand = "0.7" diff --git a/tests/by-util/test_factor.rs b/tests/by-util/test_factor.rs index 33c184ba005..5bde17cdbe6 100644 --- a/tests/by-util/test_factor.rs +++ b/tests/by-util/test_factor.rs @@ -10,15 +10,15 @@ use std::time::SystemTime; #[path = "../../src/uu/factor/sieve.rs"] mod sieve; -use self::sieve::Sieve; +extern crate conv; extern crate rand; + use self::rand::distributions::{Distribution, Uniform}; use self::rand::{rngs::SmallRng, Rng, SeedableRng}; +use self::sieve::Sieve; const NUM_PRIMES: usize = 10000; -const LOG_PRIMES: f64 = 14.0; // ceil(log2(NUM_PRIMES)) - const NUM_TESTS: usize = 100; #[test] @@ -44,6 +44,9 @@ fn test_first_100000_integers() { #[test] fn test_random() { + use conv::prelude::*; + + let log_num_primes = f64::value_from(NUM_PRIMES).unwrap().log2().ceil(); let primes = Sieve::primes().take(NUM_PRIMES).collect::>(); let rng_seed = SystemTime::now() @@ -51,16 +54,16 @@ fn test_random() { .unwrap() .as_secs(); println!("rng_seed={:?}", rng_seed); - let mut rng = SmallRng::seed_from_u64(rng_seed); + let mut rand_gt = move |min: u64| { - let mut product = 1u64; + let mut product = 1_u64; let mut factors = Vec::new(); while product < min { // log distribution---higher probability for lower numbers let factor; loop { - let next = rng.gen_range(0f64, LOG_PRIMES).exp2().floor() as usize; + let next = rng.gen_range(0_f64, log_num_primes).exp2().floor() as usize; if next < NUM_PRIMES { factor = primes[next]; break; @@ -106,7 +109,8 @@ fn test_random_big() { .as_secs(); println!("rng_seed={:?}", rng_seed); let mut rng = SmallRng::seed_from_u64(rng_seed); - let bitrange_1 = Uniform::new(14usize, 51); + + let bitrange_1 = Uniform::new(14_usize, 51); let mut rand_64 = move || { // first, choose a random number of bits for the first factor let f_bit_1 = bitrange_1.sample(&mut rng); @@ -116,11 +120,11 @@ fn test_random_big() { // we will have a number of additional factors equal to nfacts + 1 // where nfacts is in [0, floor(rem/14) ) NOTE half-open interval // Each prime factor is at least 14 bits, hence floor(rem/14) - let nfacts = Uniform::new(0usize, rem / 14).sample(&mut rng); + let nfacts = Uniform::new(0_usize, rem / 14).sample(&mut rng); // we have to distribute extrabits among the (nfacts + 1) values let extrabits = rem - (nfacts + 1) * 14; // (remember, a Range is a half-open interval) - let extrarange = Uniform::new(0usize, extrabits + 1); + let extrarange = Uniform::new(0_usize, extrabits + 1); // to generate an even split of this range, generate n-1 random elements // in the range, add the desired total value to the end, sort this list, @@ -147,7 +151,7 @@ fn test_random_big() { let f_bits = f_bits; let mut nbits = 0; - let mut product = 1u64; + let mut product = 1_u64; let mut factors = Vec::new(); for bit in f_bits { assert!(bit < 37);