From a01a00a0d37ad008ec5e00ed8d757b168aba7bb7 Mon Sep 17 00:00:00 2001 From: James Lucas Date: Sat, 17 Sep 2016 10:22:17 -0700 Subject: [PATCH 1/5] Splitting transformer trait and adding shuffler. (Needs rulinalg 0.3). --- src/data/transforms/minmax.rs | 4 +++- src/data/transforms/mod.rs | 4 ++++ src/data/transforms/shuffle.rs | 28 ++++++++++++++++++++++++++++ src/data/transforms/standardize.rs | 4 +++- 4 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 src/data/transforms/shuffle.rs diff --git a/src/data/transforms/minmax.rs b/src/data/transforms/minmax.rs index 9d479ae9..8169b18d 100644 --- a/src/data/transforms/minmax.rs +++ b/src/data/transforms/minmax.rs @@ -26,7 +26,7 @@ use learning::error::{Error, ErrorKind}; use linalg::Matrix; -use super::Transformer; +use super::{Invertible, Transformer}; use rulinalg::utils; @@ -145,7 +145,9 @@ impl Transformer> for MinMaxScaler { Ok(inputs) } +} +impl Invertible> for MinMaxScaler { fn inv_transform(&self, mut inputs: Matrix) -> Result, Error> { if let (&Some(ref scales), &Some(ref consts)) = (&self.scale_factors, &self.const_factors) { diff --git a/src/data/transforms/mod.rs b/src/data/transforms/mod.rs index 872b3863..75dded41 100644 --- a/src/data/transforms/mod.rs +++ b/src/data/transforms/mod.rs @@ -11,6 +11,7 @@ pub mod minmax; pub mod standardize; +pub mod shuffle; use learning::error; @@ -21,7 +22,10 @@ pub use self::standardize::Standardizer; pub trait Transformer { /// Transforms the inputs and stores the transformation in the Transformer fn transform(&mut self, inputs: T) -> Result; +} +/// Trait for invertible data transformers +pub trait Invertible : Transformer { /// Maps the inputs using the inverse of the fitted transform. fn inv_transform(&self, inputs: T) -> Result; } \ No newline at end of file diff --git a/src/data/transforms/shuffle.rs b/src/data/transforms/shuffle.rs new file mode 100644 index 00000000..ed457acc --- /dev/null +++ b/src/data/transforms/shuffle.rs @@ -0,0 +1,28 @@ +//! The Shuffler + +/// The Shuffler +pub struct Shuffler; + +use learning::error::{Error, ErrorKind}; +use learning::LearningResult; +use linalg::{Matrix, Vector, Axes}; +use super::Transformer; + +use rand::{Rng, thread_rng}; +use rulinalg::utils; + +impl Transformer> for Shuffler { + /// Transforms the inputs and stores the transformation in the Transformer + fn transform(&mut self, inputs: Matrix) -> LearningResult> { + let n = inputs.rows(); + let mut rng = thread_rng(); + + for i in 0..n { + // Swap i with a random point after it + let j = rng.gen_range(0, n - i); + inputs.swap_rows(i, i + j); + } + + Ok(inputs) + } +} \ No newline at end of file diff --git a/src/data/transforms/standardize.rs b/src/data/transforms/standardize.rs index 9b1dbfee..28ca6b9c 100644 --- a/src/data/transforms/standardize.rs +++ b/src/data/transforms/standardize.rs @@ -26,7 +26,7 @@ use learning::error::{Error, ErrorKind}; use linalg::{Matrix, Vector, Axes}; -use super::Transformer; +use super::{Invertible, Transformer}; use rulinalg::utils; @@ -112,7 +112,9 @@ impl Transformer> for Standardizer { Ok(inputs) } } +} +impl Invertible> for Standardizer { fn inv_transform(&self, mut inputs: Matrix) -> Result, Error> { if let (&Some(ref means), &Some(ref variances)) = (&self.means, &self.variances) { From c5c06497cd1ed2a5cdb34a753b055c130708456d Mon Sep 17 00:00:00 2001 From: James Lucas Date: Wed, 21 Sep 2016 18:54:38 -0700 Subject: [PATCH 2/5] Adding tests and docs --- src/data/transforms/shuffle.rs | 109 ++++++++++++++++++++++++++++++--- 1 file changed, 100 insertions(+), 9 deletions(-) diff --git a/src/data/transforms/shuffle.rs b/src/data/transforms/shuffle.rs index 7a11e835..1e23533e 100644 --- a/src/data/transforms/shuffle.rs +++ b/src/data/transforms/shuffle.rs @@ -1,27 +1,118 @@ //! The Shuffler - -/// The Shuffler -#[derive(Debug)] -pub struct Shuffler; +//! +//! This module contains the unit `Shuffler` struct. This struct implements the +//! `Transformer` trait and is used to shuffle the rows of an input matrix. +//! You can also control the random number generator. +//! +//! # Examples +//! +//! ``` +//! use rusty_machine::linalg::Matrix; +//! use rusty_machine::data::transforms::Transformer; +//! use rusty_machine::data::transforms::shuffle::Shuffler; +//! +//! // Create an input matrix that we want to shuffle +//! let mat = Matrix::new(3, 2, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]); +//! +//! // Create a new shuffler +//! let mut shuffler = Shuffler::default(); +//! let shuffled_mat = shuffler.transform(mat).unwrap(); +//! +//! println!("{}", shuffled_mat); +//! ``` use learning::LearningResult; use linalg::{Matrix, BaseMatrix, BaseMatrixMut}; use super::Transformer; -use rand::{Rng, thread_rng}; +use rand::{Rng, thread_rng, ThreadRng}; -impl Transformer> for Shuffler { - /// Transforms the inputs and stores the transformation in the Transformer +/// The `Shuffler` +/// +/// Provides an implementation of `Transformer` which shuffles +/// the input rows in place. +#[derive(Debug)] +pub struct Shuffler { + rng: R, +} + +impl Shuffler { + /// Construct a new `Shuffler` with given random number generator. + /// + /// # Examples + /// + /// ``` + /// // This doesn't work! + /// extern crate rand; + /// + /// use rusty_machine::data::transforms::Transformer; + /// use rusty_machine::data::transforms::shuffle::Shuffler; + /// use rand::{IsaacRng, SeedableRng}; + /// + /// // We can create a seeded rng + /// let rng = IsaacRng::from_seed(&[1, 2, 3]); + /// + /// let shuffler = Shuffler::new(rng); + /// ``` + pub fn new(rng: R) -> Self { + Shuffler { rng: rng } + } +} + +/// Create a new shuffler using the `rand::thread_rng` function +/// to provide a randomly seeded random number generator. +impl Default for Shuffler { + fn default() -> Self { + Shuffler { rng: thread_rng() } + } +} + +/// The `Shuffler` will transform the input `Matrix` by shuffling +/// its rows in place. +/// +/// Under the hood this uses a Fisher-Yates shuffle. +impl Transformer> for Shuffler { fn transform(&mut self, mut inputs: Matrix) -> LearningResult> { let n = inputs.rows(); - let mut rng = thread_rng(); for i in 0..n { // Swap i with a random point after it - let j = rng.gen_range(0, n - i); + let j = self.rng.gen_range(0, n - i); inputs.swap_rows(i, i + j); } Ok(inputs) } +} + +#[cfg(test)] +mod tests { + use linalg::Matrix; + use super::super::Transformer; + use super::Shuffler; + + use rand::{IsaacRng, SeedableRng}; + + #[test] + fn seeded_shuffle() { + let rng = IsaacRng::from_seed(&[1, 2, 3]); + let mut shuffler = Shuffler::new(rng); + + let mat = Matrix::new(4, 2, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); + let shuffled = shuffler.transform(mat).unwrap(); + + assert_eq!(shuffled.into_vec(), + vec![7.0, 8.0, 5.0, 6.0, 3.0, 4.0, 1.0, 2.0]); + } + + #[test] + fn shuffle_single_row() { + let mut shuffler = Shuffler::default(); + + let mat = Matrix::new(1, 8, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); + let shuffled = shuffler.transform(mat).unwrap(); + + assert_eq!(shuffled.into_vec(), + vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); + } } \ No newline at end of file From 8c4324bb28b24e11f4bb7238b4f30e7ab7644501 Mon Sep 17 00:00:00 2001 From: James Lucas Date: Thu, 22 Sep 2016 14:35:24 +0100 Subject: [PATCH 3/5] Fixing doc test for shuffler constructor --- src/data/transforms/shuffle.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/data/transforms/shuffle.rs b/src/data/transforms/shuffle.rs index 1e23533e..9f38385f 100644 --- a/src/data/transforms/shuffle.rs +++ b/src/data/transforms/shuffle.rs @@ -43,16 +43,19 @@ impl Shuffler { /// /// ``` /// // This doesn't work! - /// extern crate rand; + /// # extern crate rand; + /// # extern crate rusty_machine; /// /// use rusty_machine::data::transforms::Transformer; /// use rusty_machine::data::transforms::shuffle::Shuffler; /// use rand::{IsaacRng, SeedableRng}; /// + /// # fn main() { /// // We can create a seeded rng /// let rng = IsaacRng::from_seed(&[1, 2, 3]); /// /// let shuffler = Shuffler::new(rng); + /// # } /// ``` pub fn new(rng: R) -> Self { Shuffler { rng: rng } From 449ef7a8730623a2173778b9ff2d52cf8e1c47fe Mon Sep 17 00:00:00 2001 From: James Lucas Date: Fri, 23 Sep 2016 05:08:47 +0100 Subject: [PATCH 4/5] Updating docs to explain invertible trait --- src/data/transforms/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/data/transforms/mod.rs b/src/data/transforms/mod.rs index 75dded41..039aba7e 100644 --- a/src/data/transforms/mod.rs +++ b/src/data/transforms/mod.rs @@ -1,6 +1,6 @@ //! The Transforms module //! -//! This module contains the `Transformer` trait and reexports +//! This module contains the `Transformer` and `Invertible` traits and reexports //! the transformers from child modules. //! //! The `Transformer` trait provides a shared interface for all of the @@ -16,6 +16,7 @@ pub mod shuffle; use learning::error; pub use self::minmax::MinMaxScaler; +pub use self::shuffle::Shuffler; pub use self::standardize::Standardizer; /// Trait for data transformers From cca097de4424692ffd37af5653aef795c6f9fcac Mon Sep 17 00:00:00 2001 From: James Lucas Date: Fri, 23 Sep 2016 05:13:29 +0100 Subject: [PATCH 5/5] Doc and example improvements for shuffler --- src/data/transforms/shuffle.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/data/transforms/shuffle.rs b/src/data/transforms/shuffle.rs index 9f38385f..b20fc0bf 100644 --- a/src/data/transforms/shuffle.rs +++ b/src/data/transforms/shuffle.rs @@ -1,8 +1,8 @@ //! The Shuffler //! -//! This module contains the unit `Shuffler` struct. This struct implements the +//! This module contains the `Shuffler` transformer. `Shuffler` implements the //! `Transformer` trait and is used to shuffle the rows of an input matrix. -//! You can also control the random number generator. +//! You can control the random number generator used by the `Shuffler`. //! //! # Examples //! @@ -42,17 +42,16 @@ impl Shuffler { /// # Examples /// /// ``` - /// // This doesn't work! /// # extern crate rand; /// # extern crate rusty_machine; /// /// use rusty_machine::data::transforms::Transformer; /// use rusty_machine::data::transforms::shuffle::Shuffler; - /// use rand::{IsaacRng, SeedableRng}; + /// use rand::{StdRng, SeedableRng}; /// /// # fn main() { /// // We can create a seeded rng - /// let rng = IsaacRng::from_seed(&[1, 2, 3]); + /// let rng = StdRng::from_seed(&[1, 2, 3]); /// /// let shuffler = Shuffler::new(rng); /// # } @@ -94,18 +93,18 @@ mod tests { use super::super::Transformer; use super::Shuffler; - use rand::{IsaacRng, SeedableRng}; + use rand::{StdRng, SeedableRng}; #[test] fn seeded_shuffle() { - let rng = IsaacRng::from_seed(&[1, 2, 3]); + let rng = StdRng::from_seed(&[1, 2, 3]); let mut shuffler = Shuffler::new(rng); let mat = Matrix::new(4, 2, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); let shuffled = shuffler.transform(mat).unwrap(); assert_eq!(shuffled.into_vec(), - vec![7.0, 8.0, 5.0, 6.0, 3.0, 4.0, 1.0, 2.0]); + vec![3.0, 4.0, 1.0, 2.0, 7.0, 8.0, 5.0, 6.0]); } #[test]