Skip to content
This repository was archived by the owner on Jul 16, 2021. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ description = "A machine learning library."
repository = "https://github.com/AtheMathmo/rusty-machine"
documentation = "https://AtheMathmo.github.io/rusty-machine/"
keywords = ["machine","learning","stats","data","machine-learning"]
categories = ["science"]
readme = "README.md"
license = "MIT"

Expand All @@ -15,6 +16,6 @@ stats = []
datasets = []

[dependencies]
num = { version = "0.1.35", default-features = false }
rand = "0.3.14"
rulinalg = "0.3.7"
num = { version = "0.1.36", default-features = false }
rand = "0.3.15"
rulinalg = "0.4.2"
2 changes: 1 addition & 1 deletion benches/examples/cross_validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ struct DummyModel {
impl SupModel<Matrix<f64>, Matrix<f64>> for DummyModel {
fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
let predictions: Vec<f64> = inputs
.iter_rows()
.row_iter()
.map(|row| { self.sum + sum(row.iter()) })
.collect();
Ok(Matrix::new(inputs.rows(), 1, predictions))
Expand Down
4 changes: 2 additions & 2 deletions benches/examples/k_means.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ fn generate_data(centroids: &Matrix<f64>, points_per_centroid: usize, noise: f64

for _ in 0..points_per_centroid {
// Generate points from each centroid
for centroid in centroids.iter_rows() {
for centroid in centroids.row_iter() {
// Generate a point randomly around the centroid
let mut point = Vec::with_capacity(centroids.cols());
for feature in centroid {
for feature in centroid.iter() {
point.push(feature + normal_rv.ind_sample(&mut rng));
}

Expand Down
4 changes: 2 additions & 2 deletions examples/k-means_generating_cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ fn generate_data(centroids: &Matrix<f64>,

for _ in 0..points_per_centroid {
// Generate points from each centroid
for centroid in centroids.iter_rows() {
for centroid in centroids.row_iter() {
// Generate a point randomly around the centroid
let mut point = Vec::with_capacity(centroids.cols());
for feature in centroid {
for feature in centroid.iter() {
point.push(feature + normal_rv.ind_sample(&mut rng));
}

Expand Down
8 changes: 4 additions & 4 deletions examples/naive_bayes_dogs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,16 @@ fn main() {
// Score how well we did.
let mut hits = 0;
let unprinted_total = test_set_size.saturating_sub(10) as usize;
for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).take(unprinted_total) {
evaluate_prediction(&mut hits, dog, prediction);
for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).take(unprinted_total) {
evaluate_prediction(&mut hits, dog, prediction.raw_slice());
}

if unprinted_total > 0 {
println!("...");
}

for (dog, prediction) in test_dogs.iter().zip(predictions.iter_rows()).skip(unprinted_total) {
let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction);
for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).skip(unprinted_total) {
let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction.raw_slice());
println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}",
dog.color, actual_color, accurate);
}
Expand Down
10 changes: 6 additions & 4 deletions src/analysis/score.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ use learning::toolkit::cost_fn::{CostFunc, MeanSqError};
/// # Panics
///
/// - outputs and targets have different length
pub fn accuracy<I>(outputs: I, targets: I) -> f64
where I: ExactSizeIterator,
I::Item: PartialEq
pub fn accuracy<I1, I2, T>(outputs: I1, targets: I2) -> f64
where T: PartialEq,
I1: ExactSizeIterator + Iterator<Item=T>,
I2: ExactSizeIterator + Iterator<Item=T>
{
assert!(outputs.len() == targets.len(), "outputs and targets must have the same length");
let len = outputs.len() as f64;
Expand All @@ -46,7 +47,8 @@ pub fn accuracy<I>(outputs: I, targets: I) -> f64

/// Returns the fraction of outputs rows which match their target.
pub fn row_accuracy(outputs: &Matrix<f64>, targets: &Matrix<f64>) -> f64 {
accuracy(outputs.iter_rows(), targets.iter_rows())
accuracy(outputs.row_iter().map(|r| r.raw_slice()),
targets.row_iter().map(|r| r.raw_slice()))
}

/// Returns the precision score for 2 class classification.
Expand Down
12 changes: 6 additions & 6 deletions src/data/transforms/minmax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ impl<T: Float> Transformer<Matrix<T>> for MinMaxScaler<T> {
fn fit(&mut self, inputs: &Matrix<T>) -> Result<(), Error> {
let features = inputs.cols();

// ToDo: can use min, max
// TODO: can use min, max
// https://github.com/AtheMathmo/rulinalg/pull/115
let mut input_min_max = vec![(T::max_value(), T::min_value()); features];

for row in inputs.iter_rows() {
for row in inputs.row_iter() {
for (idx, (feature, min_max)) in row.into_iter().zip(input_min_max.iter_mut()).enumerate() {
if !feature.is_finite() {
return Err(Error::new(ErrorKind::InvalidData,
Expand Down Expand Up @@ -145,12 +145,12 @@ impl<T: Float> Transformer<Matrix<T>> for MinMaxScaler<T> {
Err(Error::new(ErrorKind::InvalidData,
"Input data has different number of columns from fitted data."))
} else {
for row in inputs.iter_rows_mut() {
utils::in_place_vec_bin_op(row, scales.data(), |x, &y| {
for mut row in inputs.row_iter_mut() {
utils::in_place_vec_bin_op(row.raw_slice_mut(), scales.data(), |x, &y| {
*x = *x * y;
});

utils::in_place_vec_bin_op(row, consts.data(), |x, &y| {
utils::in_place_vec_bin_op(row.raw_slice_mut(), consts.data(), |x, &y| {
*x = *x + y;
});
}
Expand All @@ -174,7 +174,7 @@ impl<T: Float> Invertible<Matrix<T>> for MinMaxScaler<T> {
"Inputs have different feature count than transformer."));
}

for row in inputs.iter_rows_mut() {
for mut row in inputs.row_iter_mut() {
for i in 0..features {
row[i] = (row[i] - consts[i]) / scales[i];
}
Expand Down
12 changes: 6 additions & 6 deletions src/data/transforms/standardize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ impl<T: Float + FromPrimitive> Transformer<Matrix<T>> for Standardizer<T> {
Err(Error::new(ErrorKind::InvalidData,
"Input data has different number of columns from fitted data."))
} else {
for row in inputs.iter_rows_mut() {
for mut row in inputs.row_iter_mut() {
// Subtract the mean
utils::in_place_vec_bin_op(row, means.data(), |x, &y| *x = *x - y);
utils::in_place_vec_bin_op(row, variances.data(), |x, &y| {
utils::in_place_vec_bin_op(row.raw_slice_mut(), means.data(), |x, &y| *x = *x - y);
utils::in_place_vec_bin_op(row.raw_slice_mut(), variances.data(), |x, &y| {
*x = (*x * self.scaled_stdev / y.sqrt()) + self.scaled_mean
});
}
Expand All @@ -143,13 +143,13 @@ impl<T: Float + FromPrimitive> Invertible<Matrix<T>> for Standardizer<T> {
"Inputs have different feature count than transformer."));
}

for row in inputs.iter_rows_mut() {
utils::in_place_vec_bin_op(row, &variances.data(), |x, &y| {
for mut row in inputs.row_iter_mut() {
utils::in_place_vec_bin_op(row.raw_slice_mut(), &variances.data(), |x, &y| {
*x = (*x - self.scaled_mean) * y.sqrt() / self.scaled_stdev
});

// Add the mean
utils::in_place_vec_bin_op(row, &means.data(), |x, &y| *x = *x + y);
utils::in_place_vec_bin_op(row.raw_slice_mut(), &means.data(), |x, &y| *x = *x + y);
}

Ok(inputs)
Expand Down
30 changes: 18 additions & 12 deletions src/learning/dbscan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ use learning::error::{Error, ErrorKind};

use linalg::{Matrix, Vector, BaseMatrix};
use rulinalg::utils;
use rulinalg::matrix::Row;

/// DBSCAN Model
///
Expand Down Expand Up @@ -80,7 +81,7 @@ impl UnSupModel<Matrix<f64>, Vector<Option<usize>>> for DBSCAN {
self.init_params(inputs.rows());
let mut cluster = 0;

for (idx, point) in inputs.iter_rows().enumerate() {
for (idx, point) in inputs.row_iter().enumerate() {
let visited = self._visited[idx];

if !visited {
Expand Down Expand Up @@ -108,12 +109,12 @@ impl UnSupModel<Matrix<f64>, Vector<Option<usize>>> for DBSCAN {
&self.clusters) {
let mut classes = Vec::with_capacity(inputs.rows());

for input_point in inputs.iter_rows() {
for input_point in inputs.row_iter() {
let mut distances = Vec::with_capacity(cluster_data.rows());

for cluster_point in cluster_data.iter_rows() {
for cluster_point in cluster_data.row_iter() {
let point_distance =
utils::vec_bin_op(input_point, cluster_point, |x, y| x - y);
utils::vec_bin_op(input_point.raw_slice(), cluster_point.raw_slice(), |x, y| x - y);
distances.push(utils::dot(&point_distance, &point_distance).sqrt());
}

Expand Down Expand Up @@ -182,7 +183,7 @@ impl DBSCAN {
let visited = self._visited[*data_point_idx];
if !visited {
self._visited[*data_point_idx] = true;
let data_point_row = unsafe { inputs.get_row_unchecked(*data_point_idx) };
let data_point_row = unsafe { inputs.row_unchecked(*data_point_idx) };
let sub_neighbours = self.region_query(data_point_row, inputs);

if sub_neighbours.len() >= self.min_points {
Expand All @@ -193,13 +194,14 @@ impl DBSCAN {
}


fn region_query(&self, point: &[f64], inputs: &Matrix<f64>) -> Vec<usize> {
debug_assert!(point.len() == inputs.cols(),
fn region_query(&self, point: Row<f64>, inputs: &Matrix<f64>) -> Vec<usize> {
debug_assert!(point.cols() == inputs.cols(),
"point must be of same dimension as inputs");

let mut in_neighbourhood = Vec::new();
for (idx, data_point) in inputs.iter_rows().enumerate() {
let point_distance = utils::vec_bin_op(data_point, point, |x, y| x - y);
for (idx, data_point) in inputs.row_iter().enumerate() {
//TODO: Use `MatrixMetric` when rulinalg#154 is fixed.
let point_distance = utils::vec_bin_op(data_point.raw_slice(), point.raw_slice(), |x, y| x - y);
let dist = utils::dot(&point_distance, &point_distance).sqrt();

if dist < self.eps {
Expand Down Expand Up @@ -227,15 +229,17 @@ impl DBSCAN {
#[cfg(test)]
mod tests {
use super::DBSCAN;
use linalg::Matrix;
use linalg::{Matrix, BaseMatrix};

#[test]
fn test_region_query() {
let model = DBSCAN::new(1.0, 3);

let inputs = Matrix::new(3, 2, vec![1.0, 1.0, 1.1, 1.9, 3.0, 3.0]);

let neighbours = model.region_query(&[1.0, 1.0], &inputs);
let m = matrix![1.0, 1.0];
let row = m.row(0);
let neighbours = model.region_query(row, &inputs);

assert!(neighbours.len() == 2);
}
Expand All @@ -246,7 +250,9 @@ mod tests {

let inputs = Matrix::new(3, 2, vec![1.0, 1.0, 1.1, 1.9, 1.1, 1.1]);

let neighbours = model.region_query(&[1.0, 1.0], &inputs);
let m = matrix![1.0, 1.0];
let row = m.row(0);
let neighbours = model.region_query(row, &inputs);

assert!(neighbours.len() == 1);
}
Expand Down
18 changes: 9 additions & 9 deletions src/learning/gmm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
//! ```
use linalg::{Matrix, MatrixSlice, Vector, BaseMatrix, BaseMatrixMut, Axes};
use rulinalg::utils;
use rulinalg::matrix::decomposition::{PartialPivLu};

use learning::{LearningResult, UnSupModel};
use learning::toolkit::rand_utils;
Expand Down Expand Up @@ -233,9 +234,9 @@ impl GaussianMixtureModel {
CovOption::Full | CovOption::Regularized(_) => {
let means = inputs.mean(Axes::Row);
let mut cov_mat = Matrix::zeros(inputs.cols(), inputs.cols());
for (j, row) in cov_mat.iter_rows_mut().enumerate() {
for (j, mut row) in cov_mat.row_iter_mut().enumerate() {
for (k, elem) in row.iter_mut().enumerate() {
*elem = inputs.iter_rows().map(|r| {
*elem = inputs.row_iter().map(|r| {
(r[j] - means[j]) * (r[k] - means[k])
}).sum::<f64>();
}
Expand All @@ -260,9 +261,10 @@ impl GaussianMixtureModel {

if let Some(ref covars) = self.model_covars {
for cov in covars {
// TODO: combine these. We compute det to get the inverse.
let covar_det = cov.det();
let covar_inv = try!(cov.inverse().map_err(Error::from));
let lup = PartialPivLu::decompose(cov.clone()).expect("Covariance could not be lup decomposed");
let covar_det = lup.det();
// TODO: We can probably remove this inverse for a more stable solve elsewhere.
let covar_inv = try!(lup.inverse().map_err(Error::from));

cov_sqrt_dets.push(covar_det.sqrt());
cov_invs.push(covar_inv);
Expand Down Expand Up @@ -309,10 +311,8 @@ impl GaussianMixtureModel {

let mut new_means = membership_weights.transpose() * inputs;

for (mean, w) in new_means.iter_rows_mut().zip(sum_weights.data().iter()) {
for m in mean.iter_mut() {
*m /= *w;
}
for (mut mean, w) in new_means.row_iter_mut().zip(sum_weights.data().iter()) {
*mean /= *w;
}

let mut new_covs = Vec::with_capacity(self.comp_count);
Expand Down
10 changes: 5 additions & 5 deletions src/learning/gp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ impl<T: Kernel, U: MeanFunc> GaussianProcess<T, U> {
let dim2 = m2.rows();

let mut ker_data = Vec::with_capacity(dim1 * dim2);
ker_data.extend(m1.iter_rows().flat_map(|row1| {
m2.iter_rows()
.map(move |row2| self.ker.kernel(row1, row2))
ker_data.extend(m1.row_iter().flat_map(|row1| {
m2.row_iter()
.map(move |row2| self.ker.kernel(row1.raw_slice(), row2.raw_slice()))
}));

Ok(Matrix::new(dim1, dim2, ker_data))
Expand Down Expand Up @@ -195,8 +195,8 @@ impl<T: Kernel, U: MeanFunc> GaussianProcess<T, U> {

let test_mat = try!(self.ker_mat(inputs, t_data));
let mut var_data = Vec::with_capacity(inputs.rows() * inputs.cols());
for row in test_mat.iter_rows() {
let test_point = Vector::new(row.to_vec());
for row in test_mat.row_iter() {
let test_point = Vector::new(row.raw_slice());
var_data.append(&mut t_mat.solve_l_triangular(test_point).unwrap().into_vec());
}

Expand Down
4 changes: 2 additions & 2 deletions src/learning/k_means.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ impl Initializer for KPlusPlus {
let first_cen = rng.gen_range(0usize, inputs.rows());

unsafe {
init_centroids.extend_from_slice(inputs.get_row_unchecked(first_cen));
init_centroids.extend_from_slice(inputs.row_unchecked(first_cen).raw_slice());
}

for i in 1..k {
Expand All @@ -350,7 +350,7 @@ impl Initializer for KPlusPlus {
}

let next_cen = sample_discretely(dist);
init_centroids.extend_from_slice(inputs.get_row_unchecked(next_cen));
init_centroids.extend_from_slice(inputs.row_unchecked(next_cen).raw_slice());
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/learning/naive_bayes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ impl<T: Distribution> NaiveBayes<T> {
self.class_counts = vec![0; class_count];
let mut class_data = vec![Vec::new(); class_count];

for (idx, row) in targets.iter_rows().enumerate() {
for (idx, row) in targets.row_iter().enumerate() {
// Find the class of this input
let class = try!(NaiveBayes::<T>::find_class(row));
let class = try!(NaiveBayes::<T>::find_class(row.raw_slice()));

// Note the class of the input
class_data[class].push(idx);
Expand Down Expand Up @@ -199,9 +199,9 @@ impl<T: Distribution> NaiveBayes<T> {
fn get_classes(log_probs: Matrix<f64>) -> Vec<usize> {
let mut data_classes = Vec::with_capacity(log_probs.rows());

data_classes.extend(log_probs.iter_rows().map(|row| {
data_classes.extend(log_probs.row_iter().map(|row| {
// Argmax each class log-probability per input
let (class, _) = utils::argmax(row);
let (class, _) = utils::argmax(row.raw_slice());
class
}));

Expand Down
2 changes: 1 addition & 1 deletion src/learning/nnet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ impl<'a, T: Criterion> BaseNeuralNet<'a, T> {
/// Gets the weights for a layer excluding the bias weights.
fn get_non_bias_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice<f64> {
let layer_weights = self.get_layer_weights(weights, idx);
layer_weights.reslice([1, 0], layer_weights.rows() - 1, layer_weights.cols())
layer_weights.sub_slice([1, 0], layer_weights.rows() - 1, layer_weights.cols())
}

/// Compute the gradient using the back propagation algorithm.
Expand Down
Loading