uutils · sylvestre · May 22, 2021 · Apr 29, 2021 · Apr 29, 2021 · Apr 29, 2021
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -324,6 +324,9 @@ wc       = { optional=true, version="0.0.6", package="uu_wc", path="src/uu/wc" }
 who      = { optional=true, version="0.0.6", package="uu_who", path="src/uu/who" }
 whoami   = { optional=true, version="0.0.6", package="uu_whoami", path="src/uu/whoami" }
 yes      = { optional=true, version="0.0.6", package="uu_yes", path="src/uu/yes" }
+
+factor_benches = { optional = true, version = "0.0.0", package = "uu_factor_benches", path = "tests/benches/factor" }
+
 #
 # * pinned transitive dependencies
 # Not needed for now. Keep as examples:

diff --git a/src/uu/factor/BENCHMARKING.md b/src/uu/factor/BENCHMARKING.md
@@ -0,0 +1,116 @@
+# Benchmarking `factor`
+
+The benchmarks for `factor` are located under `tests/benches/factor`
+and can be invoked with `cargo bench` in that directory.
+
+They are located outside the `uu_factor` crate, as they do not comply
+with the project's minimum supported Rust version, *i.e.* may require
+a newer version of `rustc`.
+
+
+## Microbenchmarking deterministic functions
+
+We currently use [`criterion`] to benchmark deterministic functions,
+such as `gcd` and `table::factor`.
+
+However, µbenchmarks are by nature unstable: not only are they specific to
+the hardware, operating system version, etc., but they are noisy and affected
+by other tasks on the system (browser, compile jobs, etc.), which can cause
+`criterion` to report spurious performance improvements and regressions.
+
+This can be mitigated by getting as close to [idealised conditions][lemire]
+as possible:
+- minimize the amount of computation and I/O running concurrently to the
+  benchmark, *i.e.* close your browser and IM clients, don't compile at the
+  same time, etc. ;
+- ensure the CPU's [frequency stays constant] during the benchmark ;
+- [isolate a **physical** core], set it to `nohz_full`, and pin the benchmark
+  to it, so it won't be preempted in the middle of a measurement ;
+- disable ASLR by running `setarch -R cargo bench`, so we can compare results
+  across multiple executions.  
+
+
+[`criterion`]: https://bheisler.github.io/criterion.rs/book/index.html
+[lemire]: https://lemire.me/blog/2018/01/16/microbenchmarking-calls-for-idealized-conditions/
+[isolate a **physical** core]: https://pyperf.readthedocs.io/en/latest/system.html#isolate-cpus-on-linux
+[frequency stays constant]: XXXTODO
+
+
+### Guidance for designing µbenchmarks
+
+*Note:* this guidance is specific to `factor` and takes its application domain
+into account; do not expect it to generalise to other projects.  It is based
+on Daniel Lemire's [*Microbenchmarking calls for idealized conditions*][lemire],
+which I recommend reading if you want to add benchmarks to `factor`.
+
+1. Select a small, self-contained, deterministic component  
+   `gcd` and `table::factor` are good example of such:
+   - no I/O or access to external data structures ;
+   - no call into other components ;
+   - behaviour is deterministic: no RNG, no concurrency, ... ;
+   - the test's body is *fast* (~100ns for `gcd`, ~10µs for `factor::table`),
+     so each sample takes a very short time, minimizing variability and
+     maximizing the numbers of samples we can take in a given time.
+
+2. Benchmarks are immutable (once merged in `uutils`)  
+   Modifying a benchmark means previously-collected values cannot meaningfully
+   be compared, silently giving nonsensical results.  If you must modify an
+   existing benchmark, rename it.
+
+3. Test common cases  
+   We are interested in overall performance, rather than specific edge-cases;
+   use **reproducibly-randomised inputs**, sampling from either all possible
+   input values or some subset of interest.
+
+4. Use [`criterion`], `criterion::black_box`, ...  
+   `criterion` isn't perfect, but it is also much better than ad-hoc
+   solutions in each benchmark.
+
+
+## Wishlist
+
+### Configurable statistical estimators
+
+`criterion` always uses the arithmetic average as estimator; in µbenchmarks,
+where the code under test is fully deterministic and the measurements are
+subject to additive, positive noise, [the minimum is more appropriate][lemire].
+
+
+### CI & reproducible performance testing
+
+Measuring performance on real hardware is important, as it relates directly
+to what users of `factor` experience; however, such measurements are subject
+to the constraints of the real-world, and aren't perfectly reproducible.
+Moreover, the mitigations for it (described above) aren't achievable in
+virtualized, multi-tenant environments such as CI.
+
+Instead, we could run the µbenchmarks in a simulated CPU with [`cachegrind`],
+measure execution “time” in that model (in CI), and use it to detect and report
+performance improvements and regressions.
+
+[`iai`] is an implementation of this idea for Rust.
+
+[`cachegrind`]: https://www.valgrind.org/docs/manual/cg-manual.html
+[`iai`]: https://bheisler.github.io/criterion.rs/book/iai/iai.html
+
+
+### Comparing randomised implementations across multiple inputs
+
+`factor` is a challenging target for system benchmarks as it combines two
+characteristics:
+
+1. integer factoring algorithms are randomised, with large variance in
+   execution time ;
+
+2. various inputs also have large differences in factoring time, that
+   corresponds to no natural, linear ordering of the inputs.
+
+
+If (1) was untrue (i.e. if execution time wasn't random), we could faithfully
+compare 2 implementations (2 successive versions, or `uutils` and GNU) using
+a scatter plot, where each axis corresponds to the perf. of one implementation.
+
+Similarly, without (2) we could plot numbers on the X axis and their factoring
+time on the Y axis, using multiple lines for various quantiles.  The large
+differences in factoring times for successive numbers, mean that such a plot
+would be unreadable.
diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml
@@ -17,20 +17,15 @@ num-traits = "0.2.13" # used in src/numerics.rs, which is included by build.rs
 [dependencies]
 coz = { version = "0.1.3", optional = true }
 num-traits = "0.2.13" # Needs at least version 0.2.13 for "OverflowingAdd"
-rand = { version="0.7", features=["small_rng"] }
-smallvec = { version="0.6.14, < 1.0" }
-uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
-uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
+rand = { version = "0.7", features = ["small_rng"] }
+smallvec = { version = "0.6.14, < 1.0" }
+uucore = { version = ">=0.0.8", package = "uucore", path = "../../uucore" }
+uucore_procs = { version = ">=0.0.5", package = "uucore_procs", path = "../../uucore_procs" }
 
 [dev-dependencies]
-criterion = "0.3"
 paste = "0.1.18"
 quickcheck = "0.9.2"
-rand_chacha = "0.2.2"
 
-[[bench]]
-name = "gcd"
-harness = false
 
 [[bin]]
 name = "factor"

diff --git a/src/uu/factor/src/cli.rs b/src/uu/factor/src/cli.rs
@@ -13,13 +13,13 @@ use std::error::Error;
 use std::io::{self, stdin, stdout, BufRead, Write};
 
 mod factor;
-pub(crate) use factor::*;
+pub use factor::*;
 use uucore::InvalidEncodingHandling;
 
 mod miller_rabin;
 pub mod numeric;
 mod rho;
-mod table;
+pub mod table;
 
 static SYNTAX: &str = "[OPTION] [NUMBER]...";
 static SUMMARY: &str = "Print the prime factors of the given number(s).

diff --git a/src/uu/factor/src/factor.rs b/src/uu/factor/src/factor.rs
@@ -161,7 +161,7 @@ pub fn factor(mut n: u64) -> Factors {
         return factors;
     }
 
-    let (factors, n) = table::factor(n, factors);
+    table::factor(&mut n, &mut factors);
 
     #[allow(clippy::let_and_return)]
     let r = if n < (1 << 32) {
@@ -239,9 +239,13 @@ mod tests {
 }
 
 #[cfg(test)]
-impl quickcheck::Arbitrary for Factors {
-    fn arbitrary<G: quickcheck::Gen>(gen: &mut G) -> Self {
-        use rand::Rng;
+use rand::{
+    distributions::{Distribution, Standard},
+    Rng,
+};
+#[cfg(test)]
+impl Distribution<Factors> for Standard {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Factors {
         let mut f = Factors::one();
         let mut g = 1u64;
         let mut n = u64::MAX;
@@ -252,7 +256,7 @@ impl quickcheck::Arbitrary for Factors {
         // See Generating Random Factored Numbers, Easily, J. Cryptology (2003)
         'attempt: loop {
             while n > 1 {
-                n = gen.gen_range(1, n);
+                n = rng.gen_range(1, n);
                 if miller_rabin::is_prime(n) {
                     if let Some(h) = g.checked_mul(n) {
                         f.push(n);
@@ -269,6 +273,13 @@ impl quickcheck::Arbitrary for Factors {
     }
 }
 
+#[cfg(test)]
+impl quickcheck::Arbitrary for Factors {
+    fn arbitrary<G: quickcheck::Gen>(g: &mut G) -> Self {
+        g.gen()
+    }
+}
+
 #[cfg(test)]
 impl std::ops::BitXor<Exponent> for Factors {
     type Output = Self;