From 60bee0d82362988be9c2f5efbe0c5da8fa6eee95 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Tue, 27 Dec 2016 22:25:33 +0900 Subject: [PATCH] Bump to rulinalg 0.4 --- Cargo.toml | 2 +- examples/k-means_generating_cluster.rs | 4 +-- examples/naive_bayes_dogs.rs | 12 +++---- src/analysis/score.rs | 49 ++++++++++++++++++++++++-- src/data/transforms/minmax.rs | 12 +++---- src/data/transforms/standardize.rs | 12 +++---- src/learning/dbscan.rs | 20 ++++++----- src/learning/gmm.rs | 14 ++++---- src/learning/gp.rs | 10 +++--- src/learning/k_means.rs | 4 +-- src/learning/naive_bayes.rs | 8 ++--- src/learning/nnet.rs | 2 +- src/learning/svm.rs | 10 +++--- src/learning/toolkit/kernel.rs | 6 ++-- src/learning/toolkit/regularization.rs | 18 ++++++---- src/lib.rs | 2 +- 16 files changed, 117 insertions(+), 68 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 34979915..d38693b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,4 +17,4 @@ datasets = [] [dependencies] num = { version = "0.1.35", default-features = false } rand = "0.3.14" -rulinalg = "0.3.7" +rulinalg = "0.4.0" diff --git a/examples/k-means_generating_cluster.rs b/examples/k-means_generating_cluster.rs index 75265447..078851df 100644 --- a/examples/k-means_generating_cluster.rs +++ b/examples/k-means_generating_cluster.rs @@ -24,10 +24,10 @@ fn generate_data(centroids: &Matrix, for _ in 0..points_per_centroid { // Generate points from each centroid - for centroid in centroids.iter_rows() { + for centroid in centroids.row_iter() { // Generate a point randomly around the centroid let mut point = Vec::with_capacity(centroids.cols()); - for feature in centroid { + for feature in centroid.iter() { point.push(feature + normal_rv.ind_sample(&mut rng)); } diff --git a/examples/naive_bayes_dogs.rs b/examples/naive_bayes_dogs.rs index 2e57de54..38f82e34 100644 --- a/examples/naive_bayes_dogs.rs +++ b/examples/naive_bayes_dogs.rs @@ -135,16 +135,16 @@ fn main() { // Score how well we did. let mut hits = 0; let unprinted_total = test_set_size.saturating_sub(10) as usize; - for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).take(unprinted_total) { - evaluate_prediction(&mut hits, dog, prediction); + for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).take(unprinted_total) { + evaluate_prediction(&mut hits, dog, prediction.raw_slice()); } - + if unprinted_total > 0 { println!("..."); } - - for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).skip(unprinted_total) { - let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction); + + for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).skip(unprinted_total) { + let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction.raw_slice()); println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}", dog.color, actual_color, accurate); } diff --git a/src/analysis/score.rs b/src/analysis/score.rs index b8329b97..4a079525 100644 --- a/src/analysis/score.rs +++ b/src/analysis/score.rs @@ -44,9 +44,24 @@ pub fn accuracy(outputs: I, targets: I) -> f64 correct as f64 / len } + /// Returns the fraction of outputs rows which match their target. -pub fn row_accuracy(outputs: &Matrix, targets: &Matrix) -> f64 { - accuracy(outputs.iter_rows(), targets.iter_rows()) +pub fn row_accuracy(outputs: &Matrix, targets: &Matrix) -> f64 { + + assert!(outputs.rows() == targets.rows()); + let len = outputs.rows() as f64; + + let correct = outputs.row_iter() + .zip(targets.row_iter()) + .filter(|&(ref x, ref y)| x.raw_slice() + .iter() + .zip(y.raw_slice()) + .all(|(v1, v2)| v1 == v2)) + .count(); + correct as f64 / len + + // Row doesn't impl PartialEq + // accuracy(outputs.row_iter(), targets.row_iter()) } /// Returns the precision score for 2 class classification. @@ -212,7 +227,7 @@ pub fn neg_mean_squared_error(outputs: &Matrix, targets: &Matrix) -> f #[cfg(test)] mod tests { use linalg::Matrix; - use super::{accuracy, precision, recall, f1, neg_mean_squared_error}; + use super::{accuracy, row_accuracy, precision, recall, f1, neg_mean_squared_error}; #[test] fn test_accuracy() { @@ -331,6 +346,34 @@ mod tests { f1(outputs.iter(), targets.iter()); } + #[test] + fn test_row_accuracy() { + let outputs = matrix![1, 0; + 0, 1; + 1, 0]; + let targets = matrix![1, 0; + 0, 1; + 1, 0]; + assert_eq!(row_accuracy(&outputs, &targets), 1.0); + + let outputs = matrix![1, 0; + 0, 1; + 1, 0]; + let targets = matrix![0, 1; + 0, 1; + 1, 0]; + assert_eq!(row_accuracy(&outputs, &targets), 2. / 3.); + + let outputs = matrix![1., 0.; + 0., 1.; + 1., 0.]; + let targets = matrix![0., 1.; + 0., 1.; + 1., 0.]; + assert_eq!(row_accuracy(&outputs, &targets), 2. / 3.); + } + + #[test] fn test_neg_mean_squared_error_1d() { let outputs = Matrix::new(3, 1, vec![1f64, 2f64, 3f64]); diff --git a/src/data/transforms/minmax.rs b/src/data/transforms/minmax.rs index 580d0ea3..d48442b4 100644 --- a/src/data/transforms/minmax.rs +++ b/src/data/transforms/minmax.rs @@ -90,7 +90,7 @@ impl Transformer> for MinMaxScaler { // https://github.com/AtheMathmo/rulinalg/pull/115 let mut input_min_max = vec![(T::max_value(), T::min_value()); features]; - for row in inputs.iter_rows() { + for row in inputs.row_iter() { for (idx, (feature, min_max)) in row.into_iter().zip(input_min_max.iter_mut()).enumerate() { if !feature.is_finite() { return Err(Error::new(ErrorKind::InvalidData, @@ -129,6 +129,7 @@ impl Transformer> for MinMaxScaler { .map(|(&(_, x), &s)| self.scaled_max - x * s) .collect::>(); +<<<<<<< HEAD self.scale_factors = Some(Vector::new(scales)); self.const_factors = Some(Vector::new(consts)); Ok(()) @@ -145,15 +146,14 @@ impl Transformer> for MinMaxScaler { Err(Error::new(ErrorKind::InvalidData, "Input data has different number of columns from fitted data.")) } else { - for row in inputs.iter_rows_mut() { - utils::in_place_vec_bin_op(row, scales.data(), |x, &y| { + for mut row in inputs.row_iter_mut() { + utils::in_place_vec_bin_op(&mut row.raw_slice_mut(), &scales, |x, &y| { *x = *x * y; }); - utils::in_place_vec_bin_op(row, consts.data(), |x, &y| { + utils::in_place_vec_bin_op(&mut row.raw_slice_mut(), &consts, |x, &y| { *x = *x + y; }); - } Ok(inputs) } } else { @@ -174,7 +174,7 @@ impl Invertible> for MinMaxScaler { "Inputs have different feature count than transformer.")); } - for row in inputs.iter_rows_mut() { + for mut row in inputs.row_iter_mut() { for i in 0..features { row[i] = (row[i] - consts[i]) / scales[i]; } diff --git a/src/data/transforms/standardize.rs b/src/data/transforms/standardize.rs index b13fa807..48f5671d 100644 --- a/src/data/transforms/standardize.rs +++ b/src/data/transforms/standardize.rs @@ -118,10 +118,10 @@ impl Transformer> for Standardizer { Err(Error::new(ErrorKind::InvalidData, "Input data has different number of columns from fitted data.")) } else { - for row in inputs.iter_rows_mut() { + for mut row in inputs.row_iter_mut() { // Subtract the mean - utils::in_place_vec_bin_op(row, means.data(), |x, &y| *x = *x - y); - utils::in_place_vec_bin_op(row, variances.data(), |x, &y| { + utils::in_place_vec_bin_op(&mut row.raw_slice_mut(), &mean.data(), |x, &y| *x = *x - y); + utils::in_place_vec_bin_op(&mut row.raw_slice_mut(), &variance.data(), |x, &y| { *x = (*x * self.scaled_stdev / y.sqrt()) + self.scaled_mean }); } @@ -143,13 +143,13 @@ impl Invertible> for Standardizer { "Inputs have different feature count than transformer.")); } - for row in inputs.iter_rows_mut() { - utils::in_place_vec_bin_op(row, &variances.data(), |x, &y| { + for mut row in inputs.row_iter_mut() { + utils::in_place_vec_bin_op(&mut row.raw_slice_mut(), &variances.data(), |x, &y| { *x = (*x - self.scaled_mean) * y.sqrt() / self.scaled_stdev }); // Add the mean - utils::in_place_vec_bin_op(row, &means.data(), |x, &y| *x = *x + y); + utils::in_place_vec_bin_op(&mut row.raw_slice_mut(), &means.data(), |x, &y| *x = *x + y); } Ok(inputs) diff --git a/src/learning/dbscan.rs b/src/learning/dbscan.rs index c45b8577..31383dbc 100644 --- a/src/learning/dbscan.rs +++ b/src/learning/dbscan.rs @@ -80,13 +80,13 @@ impl UnSupModel, Vector>> for DBSCAN { self.init_params(inputs.rows()); let mut cluster = 0; - for (idx, point) in inputs.iter_rows().enumerate() { + for (idx, point) in inputs.row_iter().enumerate() { let visited = self._visited[idx]; if !visited { self._visited[idx] = true; - let neighbours = self.region_query(point, inputs); + let neighbours = self.region_query(point.raw_slice(), inputs); if neighbours.len() >= self.min_points { self.expand_cluster(inputs, idx, neighbours, cluster); @@ -108,12 +108,14 @@ impl UnSupModel, Vector>> for DBSCAN { &self.clusters) { let mut classes = Vec::with_capacity(inputs.rows()); - for input_point in inputs.iter_rows() { + for input_point in inputs.row_iter() { let mut distances = Vec::with_capacity(cluster_data.rows()); - for cluster_point in cluster_data.iter_rows() { + for cluster_point in cluster_data.row_iter() { let point_distance = - utils::vec_bin_op(input_point, cluster_point, |x, y| x - y); + utils::vec_bin_op(input_point.raw_slice(), + cluster_point.raw_slice(), + |x, y| x - y); distances.push(utils::dot(&point_distance, &point_distance).sqrt()); } @@ -182,8 +184,8 @@ impl DBSCAN { let visited = self._visited[*data_point_idx]; if !visited { self._visited[*data_point_idx] = true; - let data_point_row = unsafe { inputs.get_row_unchecked(*data_point_idx) }; - let sub_neighbours = self.region_query(data_point_row, inputs); + let data_point_row = unsafe { inputs.row_unchecked(*data_point_idx) }; + let sub_neighbours = self.region_query(data_point_row.raw_slice(), inputs); if sub_neighbours.len() >= self.min_points { self.expand_cluster(inputs, *data_point_idx, sub_neighbours, cluster); @@ -198,8 +200,8 @@ impl DBSCAN { "point must be of same dimension as inputs"); let mut in_neighbourhood = Vec::new(); - for (idx, data_point) in inputs.iter_rows().enumerate() { - let point_distance = utils::vec_bin_op(data_point, point, |x, y| x - y); + for (idx, data_point) in inputs.row_iter().enumerate() { + let point_distance = utils::vec_bin_op(data_point.raw_slice(), point, |x, y| x - y); let dist = utils::dot(&point_distance, &point_distance).sqrt(); if dist < self.eps { diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 4c88e9ce..85e3e884 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -165,7 +165,7 @@ impl GaussianMixtureModel { if mixture_weights.size() != k { Err(Error::new(ErrorKind::InvalidParameters, "Mixture weights must have length k.")) } else if mixture_weights.data().iter().any(|&x| x < 0f64) { - Err(Error::new(ErrorKind::InvalidParameters, "Mixture weights must have only non-negative entries.")) + Err(Error::new(ErrorKind::InvalidParameters, "Mixture weights must have only non-negative entries.")) } else { let sum = mixture_weights.sum(); let normalized_weights = mixture_weights / sum; @@ -233,9 +233,9 @@ impl GaussianMixtureModel { CovOption::Full | CovOption::Regularized(_) => { let means = inputs.mean(Axes::Row); let mut cov_mat = Matrix::zeros(inputs.cols(), inputs.cols()); - for (j, row) in cov_mat.iter_rows_mut().enumerate() { + for (j, mut row) in cov_mat.row_iter_mut().enumerate() { for (k, elem) in row.iter_mut().enumerate() { - *elem = inputs.iter_rows().map(|r| { + *elem = inputs.row_iter().map(|r| { (r[j] - means[j]) * (r[k] - means[k]) }).sum::(); } @@ -259,10 +259,10 @@ impl GaussianMixtureModel { let mut cov_invs = Vec::with_capacity(self.comp_count); if let Some(ref covars) = self.model_covars { - for cov in covars { + for cov in covars.iter() { // TODO: combine these. We compute det to get the inverse. - let covar_det = cov.det(); - let covar_inv = try!(cov.inverse().map_err(Error::from)); + let covar_det = cov.clone().det(); + let covar_inv = try!(cov.clone().inverse().map_err(Error::from)); cov_sqrt_dets.push(covar_det.sqrt()); cov_invs.push(covar_inv); @@ -309,7 +309,7 @@ impl GaussianMixtureModel { let mut new_means = membership_weights.transpose() * inputs; - for (mean, w) in new_means.iter_rows_mut().zip(sum_weights.data().iter()) { + for (mut mean, w) in new_means.row_iter_mut().zip(sum_weights.data().iter()) { for m in mean.iter_mut() { *m /= *w; } diff --git a/src/learning/gp.rs b/src/learning/gp.rs index 9dc28187..32a28acd 100644 --- a/src/learning/gp.rs +++ b/src/learning/gp.rs @@ -131,9 +131,9 @@ impl GaussianProcess { let dim2 = m2.rows(); let mut ker_data = Vec::with_capacity(dim1 * dim2); - ker_data.extend(m1.iter_rows().flat_map(|row1| { - m2.iter_rows() - .map(move |row2| self.ker.kernel(row1, row2)) + ker_data.extend(m1.row_iter().flat_map(|row1| { + m2.row_iter() + .map(move |row2| self.ker.kernel(row1.raw_slice(), row2.raw_slice())) })); Ok(Matrix::new(dim1, dim2, ker_data)) @@ -195,8 +195,8 @@ impl GaussianProcess { let test_mat = try!(self.ker_mat(inputs, t_data)); let mut var_data = Vec::with_capacity(inputs.rows() * inputs.cols()); - for row in test_mat.iter_rows() { - let test_point = Vector::new(row.to_vec()); + for row in test_mat.row_iter() { + let test_point = Vector::new(row.raw_slice()); var_data.append(&mut t_mat.solve_l_triangular(test_point).unwrap().into_vec()); } diff --git a/src/learning/k_means.rs b/src/learning/k_means.rs index 54711d5d..3998fdab 100644 --- a/src/learning/k_means.rs +++ b/src/learning/k_means.rs @@ -330,7 +330,7 @@ impl Initializer for KPlusPlus { let first_cen = rng.gen_range(0usize, inputs.rows()); unsafe { - init_centroids.extend_from_slice(inputs.get_row_unchecked(first_cen)); + init_centroids.extend_from_slice(inputs.row_unchecked(first_cen).raw_slice()); } for i in 1..k { @@ -350,7 +350,7 @@ impl Initializer for KPlusPlus { } let next_cen = sample_discretely(dist); - init_centroids.extend_from_slice(inputs.get_row_unchecked(next_cen)); + init_centroids.extend_from_slice(inputs.row_unchecked(next_cen).raw_slice()); } } diff --git a/src/learning/naive_bayes.rs b/src/learning/naive_bayes.rs index 4fa2ebde..e2510087 100644 --- a/src/learning/naive_bayes.rs +++ b/src/learning/naive_bayes.rs @@ -152,9 +152,9 @@ impl NaiveBayes { self.class_counts = vec![0; class_count]; let mut class_data = vec![Vec::new(); class_count]; - for (idx, row) in targets.iter_rows().enumerate() { + for (idx, row) in targets.row_iter().enumerate() { // Find the class of this input - let class = try!(NaiveBayes::::find_class(row)); + let class = try!(NaiveBayes::::find_class(row.raw_slice())); // Note the class of the input class_data[class].push(idx); @@ -199,9 +199,9 @@ impl NaiveBayes { fn get_classes(log_probs: Matrix) -> Vec { let mut data_classes = Vec::with_capacity(log_probs.rows()); - data_classes.extend(log_probs.iter_rows().map(|row| { + data_classes.extend(log_probs.row_iter().map(|row| { // Argmax each class log-probability per input - let (class, _) = utils::argmax(row); + let (class, _) = utils::argmax(row.raw_slice()); class })); diff --git a/src/learning/nnet.rs b/src/learning/nnet.rs index de8e9b4a..3d926465 100644 --- a/src/learning/nnet.rs +++ b/src/learning/nnet.rs @@ -247,7 +247,7 @@ impl<'a, T: Criterion> BaseNeuralNet<'a, T> { /// Gets the weights for a layer excluding the bias weights. fn get_non_bias_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice { let layer_weights = self.get_layer_weights(weights, idx); - layer_weights.reslice([1, 0], layer_weights.rows() - 1, layer_weights.cols()) + layer_weights.sub_slice([1, 0], layer_weights.rows() - 1, layer_weights.cols()) } /// Compute the gradient using the back propagation algorithm. diff --git a/src/learning/svm.rs b/src/learning/svm.rs index 3287db0e..6d5622a3 100644 --- a/src/learning/svm.rs +++ b/src/learning/svm.rs @@ -111,9 +111,9 @@ impl SVM { let dim2 = m2.rows(); let mut ker_data = Vec::with_capacity(dim1 * dim2); - ker_data.extend(m1.iter_rows().flat_map(|row1| { - m2.iter_rows() - .map(move |row2| self.ker.kernel(row1, row2)) + ker_data.extend(m1.row_iter().flat_map(|row1| { + m2.row_iter() + .map(move |row2| self.ker.kernel(row1.raw_slice(), row2.raw_slice())) })); Ok(Matrix::new(dim1, dim2, ker_data)) @@ -154,8 +154,8 @@ impl SupModel, Vector> for SVM { for t in 0..self.optim_iters { let i = rng.gen_range(0, n); let row_i = full_inputs.select_rows(&[i]); - let sum = full_inputs.iter_rows() - .fold(0f64, |sum, row| sum + self.ker.kernel(row_i.data(), row)) * + let sum = full_inputs.row_iter() + .fold(0f64, |sum, row| sum + self.ker.kernel(row_i.data(), row.raw_slice())) * targets[i] / (self.lambda * (t as f64)); if sum < 1f64 { diff --git a/src/learning/toolkit/kernel.rs b/src/learning/toolkit/kernel.rs index 7df3231b..2b8f5c9f 100644 --- a/src/learning/toolkit/kernel.rs +++ b/src/learning/toolkit/kernel.rs @@ -5,7 +5,7 @@ use std::ops::{Add, Mul}; use linalg::Vector; -use linalg::Metric; +use linalg::norm::Euclidean; use rulinalg::utils; /// The Kernel trait @@ -350,7 +350,7 @@ impl Kernel for Exponential { let diff = Vector::new(x1.to_vec()) - Vector::new(x2.to_vec()); - let x = -diff.norm() / (2f64 * self.ls * self.ls); + let x = -diff.norm(Euclidean) / (2f64 * self.ls * self.ls); (self.ampl * x.exp()) } } @@ -454,7 +454,7 @@ impl Kernel for Multiquadric { let diff = Vector::new(x1.to_vec()) - Vector::new(x2.to_vec()); - diff.norm().hypot(self.c) + diff.norm(Euclidean).hypot(self.c) } } diff --git a/src/learning/toolkit/regularization.rs b/src/learning/toolkit/regularization.rs index 3c9d58be..a302991d 100644 --- a/src/learning/toolkit/regularization.rs +++ b/src/learning/toolkit/regularization.rs @@ -14,8 +14,8 @@ //! let reg = Regularization::L1(0.5); //! ``` -use linalg::Metric; use linalg::{Matrix, MatrixSlice, BaseMatrix}; +use linalg::norm::Euclidean; use libnum::{FromPrimitive, Float}; /// Model Regularization @@ -78,7 +78,7 @@ impl Regularization { } fn l2_reg_cost(mat: &MatrixSlice, x: T) -> T { - mat.norm() * x / ((T::one() + T::one()) * FromPrimitive::from_usize(mat.rows()).unwrap()) + mat.norm(Euclidean) * x / ((T::one() + T::one()) * FromPrimitive::from_usize(mat.rows()).unwrap()) } fn l2_reg_grad(mat: &MatrixSlice, x: T) -> Matrix { @@ -90,7 +90,7 @@ impl Regularization { mod tests { use super::Regularization; use linalg::{Matrix, BaseMatrix}; - use linalg::Metric; + use linalg::norm::Euclidean; #[test] fn test_no_reg() { @@ -130,7 +130,9 @@ mod tests { #[test] fn test_l2_reg() { - let input_mat = Matrix::new(3, 4, (0..12).map(|x| x as f64 - 3f64).collect::>()); + let input_mat = matrix![-3., -2., -1., 0.; + 1., 2., 3., 4.; + 5., 6., 7., 8.]; let mat_slice = input_mat.as_slice(); let no_reg: Regularization = Regularization::L2(0.5); @@ -138,7 +140,7 @@ mod tests { let a = no_reg.reg_cost(mat_slice); let b = no_reg.reg_grad(mat_slice); - assert!((a - (input_mat.norm() / 12f64)) < 1e-18); + assert!((a - (input_mat.norm(Euclidean) / 12f64)) < 1e-18); let true_grad = &input_mat / 6f64; for eps in (b - true_grad).into_vec() { @@ -148,7 +150,9 @@ mod tests { #[test] fn test_elastic_net_reg() { - let input_mat = Matrix::new(3, 4, (0..12).map(|x| x as f64 - 3f64).collect::>()); + let input_mat = matrix![-3., -2., -1., 0.; + 1., 2., 3., 4.; + 5., 6., 7., 8.]; let mat_slice = input_mat.as_slice(); let no_reg: Regularization = Regularization::ElasticNet(0.5, 0.25); @@ -156,7 +160,7 @@ mod tests { let a = no_reg.reg_cost(mat_slice); let b = no_reg.reg_grad(mat_slice); - assert!(a - ((input_mat.norm() / 24f64) + (42f64 / 12f64)) < 1e-18); + assert!(a - ((input_mat.norm(Euclidean) / 24f64) + (42f64 / 12f64)) < 1e-18); let l1_true_grad = Matrix::new(3, 4, vec![-1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1.] diff --git a/src/lib.rs b/src/lib.rs index a822f58a..2c800f09 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,7 +118,7 @@ pub mod prelude; pub mod linalg { pub use rulinalg::matrix::{Axes, Matrix, MatrixSlice, MatrixSliceMut, BaseMatrix, BaseMatrixMut}; pub use rulinalg::vector::Vector; - pub use rulinalg::Metric; + pub use rulinalg::norm; } /// Module for data handling