From 7fcb56712460f161c106ee3c5b246220d4f7e765 Mon Sep 17 00:00:00 2001 From: James Lucas Date: Sun, 19 Feb 2017 09:56:59 -0500 Subject: [PATCH 1/4] Updating to rulinalg v0.4.2 --- Cargo.toml | 7 ++++--- examples/k-means_generating_cluster.rs | 4 ++-- examples/naive_bayes_dogs.rs | 8 ++++---- src/analysis/score.rs | 10 ++++++---- src/data/transforms/minmax.rs | 12 ++++++------ src/data/transforms/standardize.rs | 12 ++++++------ src/learning/dbscan.rs | 18 +++++++++--------- src/learning/gmm.rs | 18 +++++++++--------- src/learning/gp.rs | 10 +++++----- src/learning/k_means.rs | 4 ++-- src/learning/naive_bayes.rs | 8 ++++---- src/learning/nnet.rs | 2 +- src/learning/svm.rs | 10 +++++----- src/learning/toolkit/kernel.rs | 8 +++----- src/learning/toolkit/regularization.rs | 14 ++++++-------- src/lib.rs | 2 +- tests/linalg/mat.rs | 0 17 files changed, 73 insertions(+), 74 deletions(-) delete mode 100644 tests/linalg/mat.rs diff --git a/Cargo.toml b/Cargo.toml index 34979915..1cf74dab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ description = "A machine learning library." repository = "https://github.com/AtheMathmo/rusty-machine" documentation = "https://AtheMathmo.github.io/rusty-machine/" keywords = ["machine","learning","stats","data","machine-learning"] +categories = ["science"] readme = "README.md" license = "MIT" @@ -15,6 +16,6 @@ stats = [] datasets = [] [dependencies] -num = { version = "0.1.35", default-features = false } -rand = "0.3.14" -rulinalg = "0.3.7" +num = { version = "0.1.36", default-features = false } +rand = "0.3.15" +rulinalg = "0.4.2" diff --git a/examples/k-means_generating_cluster.rs b/examples/k-means_generating_cluster.rs index 75265447..078851df 100644 --- a/examples/k-means_generating_cluster.rs +++ b/examples/k-means_generating_cluster.rs @@ -24,10 +24,10 @@ fn generate_data(centroids: &Matrix, for _ in 0..points_per_centroid { // Generate points from each centroid - for centroid in centroids.iter_rows() { + for centroid in centroids.row_iter() { // Generate a point randomly around the centroid let mut point = Vec::with_capacity(centroids.cols()); - for feature in centroid { + for feature in centroid.iter() { point.push(feature + normal_rv.ind_sample(&mut rng)); } diff --git a/examples/naive_bayes_dogs.rs b/examples/naive_bayes_dogs.rs index 2e57de54..8a4c45cc 100644 --- a/examples/naive_bayes_dogs.rs +++ b/examples/naive_bayes_dogs.rs @@ -135,16 +135,16 @@ fn main() { // Score how well we did. let mut hits = 0; let unprinted_total = test_set_size.saturating_sub(10) as usize; - for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).take(unprinted_total) { - evaluate_prediction(&mut hits, dog, prediction); + for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).take(unprinted_total) { + evaluate_prediction(&mut hits, dog, prediction.raw_slice()); } if unprinted_total > 0 { println!("..."); } - for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).skip(unprinted_total) { - let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction); + for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).skip(unprinted_total) { + let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction.raw_slice()); println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}", dog.color, actual_color, accurate); } diff --git a/src/analysis/score.rs b/src/analysis/score.rs index b8329b97..48c4010e 100644 --- a/src/analysis/score.rs +++ b/src/analysis/score.rs @@ -31,9 +31,10 @@ use learning::toolkit::cost_fn::{CostFunc, MeanSqError}; /// # Panics /// /// - outputs and targets have different length -pub fn accuracy(outputs: I, targets: I) -> f64 - where I: ExactSizeIterator, - I::Item: PartialEq +pub fn accuracy(outputs: I1, targets: I2) -> f64 + where T: PartialEq, + I1: ExactSizeIterator + Iterator, + I2: ExactSizeIterator + Iterator { assert!(outputs.len() == targets.len(), "outputs and targets must have the same length"); let len = outputs.len() as f64; @@ -46,7 +47,8 @@ pub fn accuracy(outputs: I, targets: I) -> f64 /// Returns the fraction of outputs rows which match their target. pub fn row_accuracy(outputs: &Matrix, targets: &Matrix) -> f64 { - accuracy(outputs.iter_rows(), targets.iter_rows()) + accuracy(outputs.row_iter().map(|r| r.raw_slice()), + targets.row_iter().map(|r| r.raw_slice())) } /// Returns the precision score for 2 class classification. diff --git a/src/data/transforms/minmax.rs b/src/data/transforms/minmax.rs index 580d0ea3..ad97660b 100644 --- a/src/data/transforms/minmax.rs +++ b/src/data/transforms/minmax.rs @@ -86,11 +86,11 @@ impl Transformer> for MinMaxScaler { fn fit(&mut self, inputs: &Matrix) -> Result<(), Error> { let features = inputs.cols(); - // ToDo: can use min, max + // TODO: can use min, max // https://github.com/AtheMathmo/rulinalg/pull/115 let mut input_min_max = vec![(T::max_value(), T::min_value()); features]; - for row in inputs.iter_rows() { + for row in inputs.row_iter() { for (idx, (feature, min_max)) in row.into_iter().zip(input_min_max.iter_mut()).enumerate() { if !feature.is_finite() { return Err(Error::new(ErrorKind::InvalidData, @@ -145,12 +145,12 @@ impl Transformer> for MinMaxScaler { Err(Error::new(ErrorKind::InvalidData, "Input data has different number of columns from fitted data.")) } else { - for row in inputs.iter_rows_mut() { - utils::in_place_vec_bin_op(row, scales.data(), |x, &y| { + for mut row in inputs.row_iter_mut() { + utils::in_place_vec_bin_op(row.raw_slice_mut(), scales.data(), |x, &y| { *x = *x * y; }); - utils::in_place_vec_bin_op(row, consts.data(), |x, &y| { + utils::in_place_vec_bin_op(row.raw_slice_mut(), consts.data(), |x, &y| { *x = *x + y; }); } @@ -174,7 +174,7 @@ impl Invertible> for MinMaxScaler { "Inputs have different feature count than transformer.")); } - for row in inputs.iter_rows_mut() { + for mut row in inputs.row_iter_mut() { for i in 0..features { row[i] = (row[i] - consts[i]) / scales[i]; } diff --git a/src/data/transforms/standardize.rs b/src/data/transforms/standardize.rs index b13fa807..8d13464a 100644 --- a/src/data/transforms/standardize.rs +++ b/src/data/transforms/standardize.rs @@ -118,10 +118,10 @@ impl Transformer> for Standardizer { Err(Error::new(ErrorKind::InvalidData, "Input data has different number of columns from fitted data.")) } else { - for row in inputs.iter_rows_mut() { + for mut row in inputs.row_iter_mut() { // Subtract the mean - utils::in_place_vec_bin_op(row, means.data(), |x, &y| *x = *x - y); - utils::in_place_vec_bin_op(row, variances.data(), |x, &y| { + utils::in_place_vec_bin_op(row.raw_slice_mut(), means.data(), |x, &y| *x = *x - y); + utils::in_place_vec_bin_op(row.raw_slice_mut(), variances.data(), |x, &y| { *x = (*x * self.scaled_stdev / y.sqrt()) + self.scaled_mean }); } @@ -143,13 +143,13 @@ impl Invertible> for Standardizer { "Inputs have different feature count than transformer.")); } - for row in inputs.iter_rows_mut() { - utils::in_place_vec_bin_op(row, &variances.data(), |x, &y| { + for mut row in inputs.row_iter_mut() { + utils::in_place_vec_bin_op(row.raw_slice_mut(), &variances.data(), |x, &y| { *x = (*x - self.scaled_mean) * y.sqrt() / self.scaled_stdev }); // Add the mean - utils::in_place_vec_bin_op(row, &means.data(), |x, &y| *x = *x + y); + utils::in_place_vec_bin_op(row.raw_slice_mut(), &means.data(), |x, &y| *x = *x + y); } Ok(inputs) diff --git a/src/learning/dbscan.rs b/src/learning/dbscan.rs index c45b8577..dc2a855f 100644 --- a/src/learning/dbscan.rs +++ b/src/learning/dbscan.rs @@ -80,13 +80,13 @@ impl UnSupModel, Vector>> for DBSCAN { self.init_params(inputs.rows()); let mut cluster = 0; - for (idx, point) in inputs.iter_rows().enumerate() { + for (idx, point) in inputs.row_iter().enumerate() { let visited = self._visited[idx]; if !visited { self._visited[idx] = true; - let neighbours = self.region_query(point, inputs); + let neighbours = self.region_query(point.raw_slice(), inputs); if neighbours.len() >= self.min_points { self.expand_cluster(inputs, idx, neighbours, cluster); @@ -108,12 +108,12 @@ impl UnSupModel, Vector>> for DBSCAN { &self.clusters) { let mut classes = Vec::with_capacity(inputs.rows()); - for input_point in inputs.iter_rows() { + for input_point in inputs.row_iter() { let mut distances = Vec::with_capacity(cluster_data.rows()); - for cluster_point in cluster_data.iter_rows() { + for cluster_point in cluster_data.row_iter() { let point_distance = - utils::vec_bin_op(input_point, cluster_point, |x, y| x - y); + utils::vec_bin_op(input_point.raw_slice(), cluster_point.raw_slice(), |x, y| x - y); distances.push(utils::dot(&point_distance, &point_distance).sqrt()); } @@ -182,8 +182,8 @@ impl DBSCAN { let visited = self._visited[*data_point_idx]; if !visited { self._visited[*data_point_idx] = true; - let data_point_row = unsafe { inputs.get_row_unchecked(*data_point_idx) }; - let sub_neighbours = self.region_query(data_point_row, inputs); + let data_point_row = unsafe { inputs.row_unchecked(*data_point_idx) }; + let sub_neighbours = self.region_query(data_point_row.raw_slice(), inputs); if sub_neighbours.len() >= self.min_points { self.expand_cluster(inputs, *data_point_idx, sub_neighbours, cluster); @@ -198,8 +198,8 @@ impl DBSCAN { "point must be of same dimension as inputs"); let mut in_neighbourhood = Vec::new(); - for (idx, data_point) in inputs.iter_rows().enumerate() { - let point_distance = utils::vec_bin_op(data_point, point, |x, y| x - y); + for (idx, data_point) in inputs.row_iter().enumerate() { + let point_distance = utils::vec_bin_op(data_point.raw_slice(), point, |x, y| x - y); let dist = utils::dot(&point_distance, &point_distance).sqrt(); if dist < self.eps { diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 4c88e9ce..59fc4baf 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -32,6 +32,7 @@ //! ``` use linalg::{Matrix, MatrixSlice, Vector, BaseMatrix, BaseMatrixMut, Axes}; use rulinalg::utils; +use rulinalg::matrix::decomposition::{PartialPivLu}; use learning::{LearningResult, UnSupModel}; use learning::toolkit::rand_utils; @@ -233,9 +234,9 @@ impl GaussianMixtureModel { CovOption::Full | CovOption::Regularized(_) => { let means = inputs.mean(Axes::Row); let mut cov_mat = Matrix::zeros(inputs.cols(), inputs.cols()); - for (j, row) in cov_mat.iter_rows_mut().enumerate() { + for (j, mut row) in cov_mat.row_iter_mut().enumerate() { for (k, elem) in row.iter_mut().enumerate() { - *elem = inputs.iter_rows().map(|r| { + *elem = inputs.row_iter().map(|r| { (r[j] - means[j]) * (r[k] - means[k]) }).sum::(); } @@ -260,9 +261,10 @@ impl GaussianMixtureModel { if let Some(ref covars) = self.model_covars { for cov in covars { - // TODO: combine these. We compute det to get the inverse. - let covar_det = cov.det(); - let covar_inv = try!(cov.inverse().map_err(Error::from)); + let lup = PartialPivLu::decompose(cov.clone()).expect("Covariance was not invertible"); + let covar_det = lup.det(); + // TODO: We can probably remove this inverse for a more stable solve elsewhere. + let covar_inv = try!(lup.inverse().map_err(Error::from)); cov_sqrt_dets.push(covar_det.sqrt()); cov_invs.push(covar_inv); @@ -309,10 +311,8 @@ impl GaussianMixtureModel { let mut new_means = membership_weights.transpose() * inputs; - for (mean, w) in new_means.iter_rows_mut().zip(sum_weights.data().iter()) { - for m in mean.iter_mut() { - *m /= *w; - } + for (mut mean, w) in new_means.row_iter_mut().zip(sum_weights.data().iter()) { + *mean /= *w; } let mut new_covs = Vec::with_capacity(self.comp_count); diff --git a/src/learning/gp.rs b/src/learning/gp.rs index 9dc28187..828e039d 100644 --- a/src/learning/gp.rs +++ b/src/learning/gp.rs @@ -131,9 +131,9 @@ impl GaussianProcess { let dim2 = m2.rows(); let mut ker_data = Vec::with_capacity(dim1 * dim2); - ker_data.extend(m1.iter_rows().flat_map(|row1| { - m2.iter_rows() - .map(move |row2| self.ker.kernel(row1, row2)) + ker_data.extend(m1.row_iter().flat_map(|row1| { + m2.row_iter() + .map(move |row2| self.ker.kernel(row1.raw_slice(), row2.raw_slice())) })); Ok(Matrix::new(dim1, dim2, ker_data)) @@ -195,8 +195,8 @@ impl GaussianProcess { let test_mat = try!(self.ker_mat(inputs, t_data)); let mut var_data = Vec::with_capacity(inputs.rows() * inputs.cols()); - for row in test_mat.iter_rows() { - let test_point = Vector::new(row.to_vec()); + for row in test_mat.row_iter() { + let test_point = Vector::new(row.raw_slice()); var_data.append(&mut t_mat.solve_l_triangular(test_point).unwrap().into_vec()); } diff --git a/src/learning/k_means.rs b/src/learning/k_means.rs index 54711d5d..3998fdab 100644 --- a/src/learning/k_means.rs +++ b/src/learning/k_means.rs @@ -330,7 +330,7 @@ impl Initializer for KPlusPlus { let first_cen = rng.gen_range(0usize, inputs.rows()); unsafe { - init_centroids.extend_from_slice(inputs.get_row_unchecked(first_cen)); + init_centroids.extend_from_slice(inputs.row_unchecked(first_cen).raw_slice()); } for i in 1..k { @@ -350,7 +350,7 @@ impl Initializer for KPlusPlus { } let next_cen = sample_discretely(dist); - init_centroids.extend_from_slice(inputs.get_row_unchecked(next_cen)); + init_centroids.extend_from_slice(inputs.row_unchecked(next_cen).raw_slice()); } } diff --git a/src/learning/naive_bayes.rs b/src/learning/naive_bayes.rs index 4fa2ebde..e2510087 100644 --- a/src/learning/naive_bayes.rs +++ b/src/learning/naive_bayes.rs @@ -152,9 +152,9 @@ impl NaiveBayes { self.class_counts = vec![0; class_count]; let mut class_data = vec![Vec::new(); class_count]; - for (idx, row) in targets.iter_rows().enumerate() { + for (idx, row) in targets.row_iter().enumerate() { // Find the class of this input - let class = try!(NaiveBayes::::find_class(row)); + let class = try!(NaiveBayes::::find_class(row.raw_slice())); // Note the class of the input class_data[class].push(idx); @@ -199,9 +199,9 @@ impl NaiveBayes { fn get_classes(log_probs: Matrix) -> Vec { let mut data_classes = Vec::with_capacity(log_probs.rows()); - data_classes.extend(log_probs.iter_rows().map(|row| { + data_classes.extend(log_probs.row_iter().map(|row| { // Argmax each class log-probability per input - let (class, _) = utils::argmax(row); + let (class, _) = utils::argmax(row.raw_slice()); class })); diff --git a/src/learning/nnet.rs b/src/learning/nnet.rs index de8e9b4a..3d926465 100644 --- a/src/learning/nnet.rs +++ b/src/learning/nnet.rs @@ -247,7 +247,7 @@ impl<'a, T: Criterion> BaseNeuralNet<'a, T> { /// Gets the weights for a layer excluding the bias weights. fn get_non_bias_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice { let layer_weights = self.get_layer_weights(weights, idx); - layer_weights.reslice([1, 0], layer_weights.rows() - 1, layer_weights.cols()) + layer_weights.sub_slice([1, 0], layer_weights.rows() - 1, layer_weights.cols()) } /// Compute the gradient using the back propagation algorithm. diff --git a/src/learning/svm.rs b/src/learning/svm.rs index 3287db0e..6d5622a3 100644 --- a/src/learning/svm.rs +++ b/src/learning/svm.rs @@ -111,9 +111,9 @@ impl SVM { let dim2 = m2.rows(); let mut ker_data = Vec::with_capacity(dim1 * dim2); - ker_data.extend(m1.iter_rows().flat_map(|row1| { - m2.iter_rows() - .map(move |row2| self.ker.kernel(row1, row2)) + ker_data.extend(m1.row_iter().flat_map(|row1| { + m2.row_iter() + .map(move |row2| self.ker.kernel(row1.raw_slice(), row2.raw_slice())) })); Ok(Matrix::new(dim1, dim2, ker_data)) @@ -154,8 +154,8 @@ impl SupModel, Vector> for SVM { for t in 0..self.optim_iters { let i = rng.gen_range(0, n); let row_i = full_inputs.select_rows(&[i]); - let sum = full_inputs.iter_rows() - .fold(0f64, |sum, row| sum + self.ker.kernel(row_i.data(), row)) * + let sum = full_inputs.row_iter() + .fold(0f64, |sum, row| sum + self.ker.kernel(row_i.data(), row.raw_slice())) * targets[i] / (self.lambda * (t as f64)); if sum < 1f64 { diff --git a/src/learning/toolkit/kernel.rs b/src/learning/toolkit/kernel.rs index 7df3231b..c13e543d 100644 --- a/src/learning/toolkit/kernel.rs +++ b/src/learning/toolkit/kernel.rs @@ -5,7 +5,7 @@ use std::ops::{Add, Mul}; use linalg::Vector; -use linalg::Metric; +use linalg::norm::{Euclidean, VectorNorm, VectorMetric}; use rulinalg::utils; /// The Kernel trait @@ -350,7 +350,7 @@ impl Kernel for Exponential { let diff = Vector::new(x1.to_vec()) - Vector::new(x2.to_vec()); - let x = -diff.norm() / (2f64 * self.ls * self.ls); + let x = -Euclidean.norm(&diff) / (2f64 * self.ls * self.ls); (self.ampl * x.exp()) } } @@ -452,9 +452,7 @@ impl Kernel for Multiquadric { fn kernel(&self, x1: &[f64], x2: &[f64]) -> f64 { assert_eq!(x1.len(), x2.len()); - let diff = Vector::new(x1.to_vec()) - Vector::new(x2.to_vec()); - - diff.norm().hypot(self.c) + Euclidean.metric(&(x1.into()), &(x2.into())).hypot(self.c) } } diff --git a/src/learning/toolkit/regularization.rs b/src/learning/toolkit/regularization.rs index 3c9d58be..395d36bd 100644 --- a/src/learning/toolkit/regularization.rs +++ b/src/learning/toolkit/regularization.rs @@ -14,7 +14,7 @@ //! let reg = Regularization::L1(0.5); //! ``` -use linalg::Metric; +use linalg::norm::{Euclidean, Lp, MatrixNorm}; use linalg::{Matrix, MatrixSlice, BaseMatrix}; use libnum::{FromPrimitive, Float}; @@ -57,9 +57,7 @@ impl Regularization { } fn l1_reg_cost(mat: &MatrixSlice, x: T) -> T { - // TODO: This won't be regularized. Need to unroll... - let l1_norm = mat.iter() - .fold(T::zero(), |acc, y| acc + y.abs()); + let l1_norm = Lp::Integer(1).norm(mat); l1_norm * x / ((T::one() + T::one()) * FromPrimitive::from_usize(mat.rows()).unwrap()) } @@ -78,7 +76,7 @@ impl Regularization { } fn l2_reg_cost(mat: &MatrixSlice, x: T) -> T { - mat.norm() * x / ((T::one() + T::one()) * FromPrimitive::from_usize(mat.rows()).unwrap()) + Euclidean.norm(mat) * x / ((T::one() + T::one()) * FromPrimitive::from_usize(mat.rows()).unwrap()) } fn l2_reg_grad(mat: &MatrixSlice, x: T) -> Matrix { @@ -90,7 +88,7 @@ impl Regularization { mod tests { use super::Regularization; use linalg::{Matrix, BaseMatrix}; - use linalg::Metric; + use linalg::norm::{Euclidean, MatrixNorm}; #[test] fn test_no_reg() { @@ -138,7 +136,7 @@ mod tests { let a = no_reg.reg_cost(mat_slice); let b = no_reg.reg_grad(mat_slice); - assert!((a - (input_mat.norm() / 12f64)) < 1e-18); + assert!((a - (Euclidean.norm(&input_mat) / 12f64)) < 1e-18); let true_grad = &input_mat / 6f64; for eps in (b - true_grad).into_vec() { @@ -156,7 +154,7 @@ mod tests { let a = no_reg.reg_cost(mat_slice); let b = no_reg.reg_grad(mat_slice); - assert!(a - ((input_mat.norm() / 24f64) + (42f64 / 12f64)) < 1e-18); + assert!(a - ((Euclidean.norm(&input_mat) / 24f64) + (42f64 / 12f64)) < 1e-18); let l1_true_grad = Matrix::new(3, 4, vec![-1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1.] diff --git a/src/lib.rs b/src/lib.rs index a822f58a..2c800f09 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,7 +118,7 @@ pub mod prelude; pub mod linalg { pub use rulinalg::matrix::{Axes, Matrix, MatrixSlice, MatrixSliceMut, BaseMatrix, BaseMatrixMut}; pub use rulinalg::vector::Vector; - pub use rulinalg::Metric; + pub use rulinalg::norm; } /// Module for data handling diff --git a/tests/linalg/mat.rs b/tests/linalg/mat.rs deleted file mode 100644 index e69de29b..00000000 From 15c2153960a5b1bdbb1692b7f49ab659d6bf487e Mon Sep 17 00:00:00 2001 From: James Lucas Date: Sun, 19 Feb 2017 10:19:26 -0500 Subject: [PATCH 2/4] Slight tidy up for dbscan --- src/learning/dbscan.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/learning/dbscan.rs b/src/learning/dbscan.rs index dc2a855f..e7bbdee7 100644 --- a/src/learning/dbscan.rs +++ b/src/learning/dbscan.rs @@ -41,6 +41,7 @@ use learning::error::{Error, ErrorKind}; use linalg::{Matrix, Vector, BaseMatrix}; use rulinalg::utils; +use rulinalg::matrix::Row; /// DBSCAN Model /// @@ -86,7 +87,7 @@ impl UnSupModel, Vector>> for DBSCAN { if !visited { self._visited[idx] = true; - let neighbours = self.region_query(point.raw_slice(), inputs); + let neighbours = self.region_query(point, inputs); if neighbours.len() >= self.min_points { self.expand_cluster(inputs, idx, neighbours, cluster); @@ -183,7 +184,7 @@ impl DBSCAN { if !visited { self._visited[*data_point_idx] = true; let data_point_row = unsafe { inputs.row_unchecked(*data_point_idx) }; - let sub_neighbours = self.region_query(data_point_row.raw_slice(), inputs); + let sub_neighbours = self.region_query(data_point_row, inputs); if sub_neighbours.len() >= self.min_points { self.expand_cluster(inputs, *data_point_idx, sub_neighbours, cluster); @@ -193,13 +194,14 @@ impl DBSCAN { } - fn region_query(&self, point: &[f64], inputs: &Matrix) -> Vec { - debug_assert!(point.len() == inputs.cols(), + fn region_query(&self, point: Row, inputs: &Matrix) -> Vec { + debug_assert!(point.cols() == inputs.cols(), "point must be of same dimension as inputs"); let mut in_neighbourhood = Vec::new(); for (idx, data_point) in inputs.row_iter().enumerate() { - let point_distance = utils::vec_bin_op(data_point.raw_slice(), point, |x, y| x - y); + //TODO: Use `MatrixMetric` when rulinalg#154 is fixed. + let point_distance = utils::vec_bin_op(data_point.raw_slice(), point.raw_slice(), |x, y| x - y); let dist = utils::dot(&point_distance, &point_distance).sqrt(); if dist < self.eps { @@ -227,7 +229,7 @@ impl DBSCAN { #[cfg(test)] mod tests { use super::DBSCAN; - use linalg::Matrix; + use linalg::{Matrix, BaseMatrix}; #[test] fn test_region_query() { @@ -235,7 +237,9 @@ mod tests { let inputs = Matrix::new(3, 2, vec![1.0, 1.0, 1.1, 1.9, 3.0, 3.0]); - let neighbours = model.region_query(&[1.0, 1.0], &inputs); + let m = matrix![1.0, 1.0]; + let row = m.row(0); + let neighbours = model.region_query(row, &inputs); assert!(neighbours.len() == 2); } @@ -246,7 +250,9 @@ mod tests { let inputs = Matrix::new(3, 2, vec![1.0, 1.0, 1.1, 1.9, 1.1, 1.1]); - let neighbours = model.region_query(&[1.0, 1.0], &inputs); + let m = matrix![1.0, 1.0]; + let row = m.row(0); + let neighbours = model.region_query(row, &inputs); assert!(neighbours.len() == 1); } From 5e1659e3ddd62b2aa5ebab0d7cb371ddb8901dca Mon Sep 17 00:00:00 2001 From: James Lucas Date: Sun, 19 Feb 2017 10:28:31 -0500 Subject: [PATCH 3/4] Improving GMM cov decomposition error msg --- src/learning/gmm.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 59fc4baf..0bde0e8c 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -261,7 +261,7 @@ impl GaussianMixtureModel { if let Some(ref covars) = self.model_covars { for cov in covars { - let lup = PartialPivLu::decompose(cov.clone()).expect("Covariance was not invertible"); + let lup = PartialPivLu::decompose(cov.clone()).expect("Covariance could not be lup decomposed"); let covar_det = lup.det(); // TODO: We can probably remove this inverse for a more stable solve elsewhere. let covar_inv = try!(lup.inverse().map_err(Error::from)); From 3c2b6066c58edc2423f10c38b9cdc3d102dba851 Mon Sep 17 00:00:00 2001 From: James Lucas Date: Sun, 19 Feb 2017 10:32:14 -0500 Subject: [PATCH 4/4] Updating benchmarks with rulinalg0.4 --- benches/examples/cross_validation.rs | 2 +- benches/examples/k_means.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benches/examples/cross_validation.rs b/benches/examples/cross_validation.rs index d2adfa8a..e0450fb3 100644 --- a/benches/examples/cross_validation.rs +++ b/benches/examples/cross_validation.rs @@ -28,7 +28,7 @@ struct DummyModel { impl SupModel, Matrix> for DummyModel { fn predict(&self, inputs: &Matrix) -> LearningResult> { let predictions: Vec = inputs - .iter_rows() + .row_iter() .map(|row| { self.sum + sum(row.iter()) }) .collect(); Ok(Matrix::new(inputs.rows(), 1, predictions)) diff --git a/benches/examples/k_means.rs b/benches/examples/k_means.rs index 8691ec05..3791962f 100644 --- a/benches/examples/k_means.rs +++ b/benches/examples/k_means.rs @@ -20,10 +20,10 @@ fn generate_data(centroids: &Matrix, points_per_centroid: usize, noise: f64 for _ in 0..points_per_centroid { // Generate points from each centroid - for centroid in centroids.iter_rows() { + for centroid in centroids.row_iter() { // Generate a point randomly around the centroid let mut point = Vec::with_capacity(centroids.cols()); - for feature in centroid { + for feature in centroid.iter() { point.push(feature + normal_rv.ind_sample(&mut rng)); }