AtheMathmo · sinhrks · Jan 2, 2017 · Jan 2, 2017 · Jan 21, 2017 · Jan 29, 2017
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,6 +7,7 @@ description = "A machine learning library."
 repository = "https://github.com/AtheMathmo/rusty-machine"
 documentation = "https://AtheMathmo.github.io/rusty-machine/"
 keywords = ["machine","learning","stats","data","machine-learning"]
+categories = ["science"]
 readme = "README.md"
 license = "MIT"
 
@@ -15,6 +16,6 @@ stats = []
 datasets = []
 
 [dependencies]
-num = { version = "0.1.35", default-features = false }
-rand = "0.3.14"
-rulinalg = "0.3.7"
+num = { version = "0.1.36", default-features = false }
+rand = "0.3.15"
+rulinalg = "0.4.2"
diff --git a/benches/examples/cross_validation.rs b/benches/examples/cross_validation.rs
@@ -28,7 +28,7 @@ struct DummyModel {
 impl SupModel<Matrix<f64>, Matrix<f64>> for DummyModel {
     fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
         let predictions: Vec<f64> = inputs
-            .iter_rows()
+            .row_iter()
             .map(|row| { self.sum + sum(row.iter()) })
             .collect();
         Ok(Matrix::new(inputs.rows(), 1, predictions))

diff --git a/benches/examples/k_means.rs b/benches/examples/k_means.rs
@@ -20,10 +20,10 @@ fn generate_data(centroids: &Matrix<f64>, points_per_centroid: usize, noise: f64
 
     for _ in 0..points_per_centroid {
         // Generate points from each centroid
-        for centroid in centroids.iter_rows() {
+        for centroid in centroids.row_iter() {
             // Generate a point randomly around the centroid
             let mut point = Vec::with_capacity(centroids.cols());
-            for feature in centroid {
+            for feature in centroid.iter() {
                 point.push(feature + normal_rv.ind_sample(&mut rng));
             }
 

diff --git a/examples/k-means_generating_cluster.rs b/examples/k-means_generating_cluster.rs
@@ -24,10 +24,10 @@ fn generate_data(centroids: &Matrix<f64>,
 
     for _ in 0..points_per_centroid {
         // Generate points from each centroid
-        for centroid in centroids.iter_rows() {
+        for centroid in centroids.row_iter() {
             // Generate a point randomly around the centroid
             let mut point = Vec::with_capacity(centroids.cols());
-            for feature in centroid {
+            for feature in centroid.iter() {
                 point.push(feature + normal_rv.ind_sample(&mut rng));
             }
 

diff --git a/examples/naive_bayes_dogs.rs b/examples/naive_bayes_dogs.rs
@@ -135,16 +135,16 @@ fn main() {
     // Score how well we did.
     let mut hits = 0;
     let unprinted_total = test_set_size.saturating_sub(10) as usize;
-    for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).take(unprinted_total) {
-        evaluate_prediction(&mut hits, dog, prediction);
+    for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).take(unprinted_total) {
+        evaluate_prediction(&mut hits, dog, prediction.raw_slice());
     }
 
     if unprinted_total > 0 {
         println!("...");
     }
 
-    for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).skip(unprinted_total) {
-        let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction);
+    for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).skip(unprinted_total) {
+        let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction.raw_slice());
         println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}",
                  dog.color, actual_color, accurate);
     }

diff --git a/src/analysis/score.rs b/src/analysis/score.rs
@@ -31,9 +31,10 @@ use learning::toolkit::cost_fn::{CostFunc, MeanSqError};
 /// # Panics
 ///
 /// - outputs and targets have different length
-pub fn accuracy<I>(outputs: I, targets: I) -> f64
-    where I: ExactSizeIterator,
-          I::Item: PartialEq
+pub fn accuracy<I1, I2, T>(outputs: I1, targets: I2) -> f64
+    where T: PartialEq,
+          I1: ExactSizeIterator + Iterator<Item=T>,
+          I2: ExactSizeIterator + Iterator<Item=T>
 {
     assert!(outputs.len() == targets.len(), "outputs and targets must have the same length");
     let len = outputs.len() as f64;
@@ -46,7 +47,8 @@ pub fn accuracy<I>(outputs: I, targets: I) -> f64
 
 /// Returns the fraction of outputs rows which match their target.
 pub fn row_accuracy(outputs: &Matrix<f64>, targets: &Matrix<f64>) -> f64 {
-    accuracy(outputs.iter_rows(), targets.iter_rows())
+    accuracy(outputs.row_iter().map(|r| r.raw_slice()),
+             targets.row_iter().map(|r| r.raw_slice()))
 }
 
 /// Returns the precision score for 2 class classification.

diff --git a/src/data/transforms/minmax.rs b/src/data/transforms/minmax.rs
@@ -86,11 +86,11 @@ impl<T: Float> Transformer<Matrix<T>> for MinMaxScaler<T> {
     fn fit(&mut self, inputs: &Matrix<T>) -> Result<(), Error> {
         let features = inputs.cols();
 
-        // ToDo: can use min, max
+        // TODO: can use min, max
         // https://github.com/AtheMathmo/rulinalg/pull/115
         let mut input_min_max = vec![(T::max_value(), T::min_value()); features];
 
-        for row in inputs.iter_rows() {
+        for row in inputs.row_iter() {
             for (idx, (feature, min_max)) in row.into_iter().zip(input_min_max.iter_mut()).enumerate() {
                 if !feature.is_finite() {
                     return Err(Error::new(ErrorKind::InvalidData,
@@ -145,12 +145,12 @@ impl<T: Float> Transformer<Matrix<T>> for MinMaxScaler<T> {
                 Err(Error::new(ErrorKind::InvalidData,
                                "Input data has different number of columns from fitted data."))
             } else {
-                for row in inputs.iter_rows_mut() {
-                    utils::in_place_vec_bin_op(row, scales.data(), |x, &y| {
+                for mut row in inputs.row_iter_mut() {
+                    utils::in_place_vec_bin_op(row.raw_slice_mut(), scales.data(), |x, &y| {
                         *x = *x * y;
                     });
 
-                    utils::in_place_vec_bin_op(row, consts.data(), |x, &y| {
+                    utils::in_place_vec_bin_op(row.raw_slice_mut(), consts.data(), |x, &y| {
                         *x = *x + y;
                     });
                 }
@@ -174,7 +174,7 @@ impl<T: Float> Invertible<Matrix<T>> for MinMaxScaler<T> {
                                       "Inputs have different feature count than transformer."));
             }
 
-            for row in inputs.iter_rows_mut() {
+            for mut row in inputs.row_iter_mut() {
                 for i in 0..features {
                     row[i] = (row[i] - consts[i]) / scales[i];
                 }

diff --git a/src/data/transforms/mod.rs b/src/data/transforms/mod.rs
@@ -10,12 +10,14 @@
 //! commonly used in machine learning.
 
 pub mod minmax;
+pub mod normalize;
 pub mod standardize;
 pub mod shuffle;
 
 use learning::error;
 
 pub use self::minmax::MinMaxScaler;
+pub use self::normalize::Normalizer;
 pub use self::shuffle::Shuffler;
 pub use self::standardize::Standardizer;
 
@@ -31,4 +33,4 @@ pub trait Transformer<T> {
 pub trait Invertible<T> : Transformer<T> {
     /// Maps the inputs using the inverse of the fitted transform.
     fn inv_transform(&self, inputs: T) -> Result<T, error::Error>;
-}
+}
diff --git a/src/data/transforms/normalize.rs b/src/data/transforms/normalize.rs
@@ -0,0 +1,182 @@
+//! The Normalizing Transformer
+//!
+//! This module contains the `Normalizer` transformer.
+//!
+//! The `Normalizer` transformer is used to transform input data
+//! so that the norm of each row is equal to 1. By default the
+//! `Normalizer` uses the `Euclidean` norm.
+//!
+//! If input data has a row with all 0, `Normalizer` keeps the row as it is.
+//!
+//! Because transformation is performed per row independently,
+//! inverse transformation is not supported.
+//!
+//! # Examples
+//!
+//! ```
+//! use rusty_machine::data::transforms::{Transformer, Normalizer};
+//! use rusty_machine::linalg::Matrix;
+//!
+//! // Constructs a new `Normalizer`
+//! let mut transformer = Normalizer::default();
+//!
+//! let inputs = Matrix::new(2, 2, vec![-1.0, 2.0, 1.5, 3.0]);
+//!
+//! // Transform the inputs
+//! let transformed = transformer.transform(inputs).unwrap();
+//! ```
+
+use learning::error::{Error, ErrorKind};
+use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut};
+use rulinalg::norm::{MatrixNorm, Euclidean};
+
+use super::Transformer;
+
+use libnum::Float;
+
+use std::marker::PhantomData;
+
+/// The Normalizer
+///
+/// The Normalizer provides an implementation of `Transformer`
+/// which allows us to transform the all rows to have the same norm.
+///
+/// The default `Normalizer` will use the `Euclidean` norm.
+///
+/// See the module description for more information.
+#[derive(Debug)]
+pub struct Normalizer<T: Float, M>
+    where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
+{
+    norm: M,
+    _marker: PhantomData<T>
+}
+
+/// Create a `Normalizer` with a Euclidean norm.
+impl<T: Float> Default for Normalizer<T, Euclidean> {
+    fn default() -> Self {
+        Normalizer {
+            norm: Euclidean,
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<T: Float, M> Normalizer<T, M>
+    where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
+{
+    /// Constructs a new `Normalizer` with given norm.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use rusty_machine::data::transforms::Normalizer;
+    /// use rusty_machine::linalg::norm::Euclidean;
+    ///
+    /// // Constructs a new `Normalizer`
+    /// let _ = Normalizer::<f64, Euclidean>::new(Euclidean);
+    /// ```
+    pub fn new(norm: M) -> Self {
+        Normalizer {
+            norm: norm,
+            _marker: PhantomData
+        }
+    }
+}
+
+impl<T: Float, M> Transformer<Matrix<T>> for Normalizer<T, M>
+    where for<'a> M: MatrixNorm<T, MatrixSlice<'a, T>>
+{
+
+    fn fit(&mut self, _: &Matrix<T>) -> Result<(), Error> {
+        // no op, because it has no parameter to fit
+        Ok(())
+    }
+
+    fn transform(&mut self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
+        let dists: Vec<T> = inputs.row_iter().map(|m| self.norm.norm(&*m)).collect();
+        for (mut row, &d) in inputs.row_iter_mut().zip(dists.iter()) {
+
+            if !d.is_finite() {
+                return Err(Error::new(ErrorKind::InvalidData,
+                                      "Some data point is non-finite."));
+            } else if d != T::zero() {
+                // no change if distance is 0
+                *row /= d;
+            }
+        }
+        Ok(inputs)
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use super::super::Transformer;
+    use linalg::Matrix;
+
+    use std::f64;
+
+    #[test]
+    fn nan_data_test() {
+        let inputs = Matrix::new(2, 2, vec![f64::NAN; 4]);
+        let mut normalizer = Normalizer::default();
+        let res = normalizer.transform(inputs);
+        assert!(res.is_err());
+    }
+
+    #[test]
+    fn inf_data_test() {
+        let inputs = Matrix::new(2, 2, vec![f64::INFINITY; 4]);
+        let mut normalizer = Normalizer::default();
+        let res = normalizer.transform(inputs);
+        assert!(res.is_err());
+    }
+
+    #[test]
+    fn single_row_test() {
+        let inputs = matrix![1.0, 2.0];
+        let mut normalizer = Normalizer::default();
+        let transformed = normalizer.transform(inputs).unwrap();
+
+        let exp = matrix![0.4472135954999579, 0.8944271909999159];
+        assert_matrix_eq!(transformed, exp);
+    }
+
+    #[test]
+    fn basic_normalizer_test() {
+        let inputs = matrix![-1.0f32, 2.0;
+                             0.0, 3.0];
+
+        let mut normalizer = Normalizer::default();
+        let transformed = normalizer.transform(inputs).unwrap();
+
+        let exp = matrix![-0.4472135954999579, 0.8944271909999159;
+                          0., 1.];
+        assert_matrix_eq!(transformed, exp);
+
+        let inputs = matrix![1., 2.;
+                             10., 20.;
+                             100., 200.];
+
+        let transformed = normalizer.transform(inputs).unwrap();
+
+        let exp = matrix![0.4472135954999579, 0.8944271909999159;
+                          0.4472135954999579, 0.8944271909999159;
+                          0.4472135954999579, 0.8944271909999159];
+        assert_matrix_eq!(transformed, exp);
+
+        let inputs = matrix![1., 2., 10.;
+                             0., 10., 20.;
+                             100., 10., 200.;
+                             0., 0., 0.];
+        let transformed = normalizer.transform(inputs).unwrap();
+
+        let exp = matrix![0.09759000729485333, 0.19518001458970666, 0.9759000729485332;
+                          0., 0.4472135954999579, 0.8944271909999159;
+                          0.4467670516087703, 0.04467670516087703, 0.8935341032175406;
+                          0., 0., 0.];
+        assert_matrix_eq!(transformed, exp);
+    }
+}
diff --git a/src/data/transforms/standardize.rs b/src/data/transforms/standardize.rs
@@ -118,10 +118,10 @@ impl<T: Float + FromPrimitive> Transformer<Matrix<T>> for Standardizer<T> {
                 Err(Error::new(ErrorKind::InvalidData,
                                "Input data has different number of columns from fitted data."))
             } else {
-                for row in inputs.iter_rows_mut() {
+                for mut row in inputs.row_iter_mut() {
                     // Subtract the mean
-                    utils::in_place_vec_bin_op(row, means.data(), |x, &y| *x = *x - y);
-                    utils::in_place_vec_bin_op(row, variances.data(), |x, &y| {
+                    utils::in_place_vec_bin_op(row.raw_slice_mut(), means.data(), |x, &y| *x = *x - y);
+                    utils::in_place_vec_bin_op(row.raw_slice_mut(), variances.data(), |x, &y| {
                         *x = (*x * self.scaled_stdev / y.sqrt()) + self.scaled_mean
                     });
                 }
@@ -143,13 +143,13 @@ impl<T: Float + FromPrimitive> Invertible<Matrix<T>> for Standardizer<T> {
                                       "Inputs have different feature count than transformer."));
             }
 
-            for row in inputs.iter_rows_mut() {
-                utils::in_place_vec_bin_op(row, &variances.data(), |x, &y| {
+            for mut row in inputs.row_iter_mut() {
+                utils::in_place_vec_bin_op(row.raw_slice_mut(), &variances.data(), |x, &y| {
                     *x = (*x - self.scaled_mean) * y.sqrt() / self.scaled_stdev
                 });
 
                 // Add the mean
-                utils::in_place_vec_bin_op(row, &means.data(), |x, &y| *x = *x + y);
+                utils::in_place_vec_bin_op(row.raw_slice_mut(), &means.data(), |x, &y| *x = *x + y);
             }
 
             Ok(inputs)