From eb984feeb9202b02bcd2f7b825fa2fc2ff313a73 Mon Sep 17 00:00:00 2001 From: NivenT Date: Sat, 3 Sep 2016 01:35:05 -0400 Subject: [PATCH 01/28] Implemented RMSProp --- src/learning/optim/grad_desc.rs | 130 +++++++++++++++++++++++++++++- tests/learning/optim/grad_desc.rs | 17 +++- 2 files changed, 145 insertions(+), 2 deletions(-) diff --git a/src/learning/optim/grad_desc.rs b/src/learning/optim/grad_desc.rs index 73df8fa6..26c96631 100644 --- a/src/learning/optim/grad_desc.rs +++ b/src/learning/optim/grad_desc.rs @@ -303,10 +303,121 @@ impl, Targets = Matrix>> OptimAlgorithm } } +/// RMSProp +/// +/// The RMSProp algorithm (Hinton et al. 2012). +#[derive(Debug, Clone, Copy)] +pub struct RMSProp { + /// The base step size of gradient descent steps + learning_rate: f64, + /// Factor of averaged square gradients to keep + decay_rate: f64, + /// Small value used to avoid divide by zero + epsilon: f64, + /// The number of passes through the data + iters: usize, +} + +/// The default RMSProp configuration +/// +/// The defaults are: +/// +/// - learning_rate = 0.01 +/// - decay_rate = 0.9 +/// - epsilon = 1.0e-5 +/// - iters = 50 +impl Default for RMSProp { + fn default() -> RMSProp { + RMSProp { + learning_rate: 0.01, + decay_rate: 0.9, + epsilon: 1.0e-5, + iters: 50 + } + } +} + +impl RMSProp { + /// Construct an RMSProp algorithm. + /// + /// Requires learning rate, decay rate, epsilon, and iteration count. + /// + /// #Examples + /// + /// ``` + /// use rusty_machine::learning::optim::grad_desc::RMSProp; + /// + /// let rms = RMSProp::new(0.99, 0.01, 1e-5, 20); + /// ``` + pub fn new(learning_rate: f64, decay_rate: f64, epsilon: f64, iters: usize) -> RMSProp { + assert!(0f64 < learning_rate, "The learning rate must be positive"); + assert!(0f64 < decay_rate && decay_rate < 1f64, "The decay rate must be between 0 and 1"); + assert!(0f64 < epsilon, "Epsilon must be positive"); + + RMSProp { + decay_rate: decay_rate, + learning_rate: learning_rate, + epsilon: epsilon, + iters: iters + } + } +} + +impl OptimAlgorithm for RMSProp + where M: Optimizable, Targets = Matrix> { + fn optimize(&self, + model: &M, + start: &[f64], + inputs: &M::Inputs, + targets: &M::Targets) + -> Vec { + // Initial parameters + let mut params = Vector::new(start.to_vec()); + // Running average of squared gradients + let mut rmsprop_cache = Vector::zeros(start.len()); + + // Set up indices for permutation + let mut permutation = (0..inputs.rows()).collect::>(); + // The cost at the start of each iteration + let mut start_iter_cost = 0f64; + + for _ in 0..self.iters { + // The cost at end of each pass + let mut end_cost = 0f64; + // Permute the vertices + rand_utils::in_place_fisher_yates(&mut permutation); + for i in &permutation { + let (cost, grad) = model.compute_grad(params.data(), + &inputs.select_rows(&[*i]), + &targets.select_rows(&[*i])); + + let grad = Vector::new(grad); + let grad_squared = grad.clone().apply(&|x| x*x); + // Update cached average of squared gradients + rmsprop_cache = &rmsprop_cache*self.decay_rate + &grad_squared*(1.0 - self.decay_rate); + // RMSProp update rule + params = ¶ms - (&grad*self.learning_rate).elediv( + &rmsprop_cache.clone().apply(&|x| (x + self.epsilon).sqrt())); + + end_cost += cost; + } + end_cost /= inputs.rows() as f64; + + // Early stopping + if (start_iter_cost - end_cost).abs() < LEARNING_EPS { + break; + } else { + start_iter_cost = end_cost; + } + } + params.into_vec() + } +} + #[cfg(test)] mod tests { - use super::{GradientDesc, StochasticGD, AdaGrad}; + use super::{GradientDesc, StochasticGD, AdaGrad, RMSProp}; #[test] #[should_panic] @@ -338,4 +449,21 @@ mod tests { let _ = AdaGrad::new(0.5, -1f64, 0); } + #[test] + #[should_panic] + fn rmsprop_neg_decay_rate() { + let _ = RMSProp::new(-0.5, 0.005, 1.0e-5, 0); + } + + #[test] + #[should_panic] + fn rmsprop_neg_epsilon() { + let _ = RMSProp::new(0.5, 0.005, -1.0e-5, 0); + } + + #[test] + #[should_panic] + fn rmsprop_neg_learning_rate() { + let _ = RMSProp::new(0.5, -0.005, 1.0e-5, 0); + } } diff --git a/tests/learning/optim/grad_desc.rs b/tests/learning/optim/grad_desc.rs index 97342fae..f9f74303 100644 --- a/tests/learning/optim/grad_desc.rs +++ b/tests/learning/optim/grad_desc.rs @@ -1,6 +1,6 @@ use rm::learning::optim::Optimizable; use rm::learning::optim::fmincg::ConjugateGD; -use rm::learning::optim::grad_desc::{GradientDesc, StochasticGD, AdaGrad}; +use rm::learning::optim::grad_desc::{GradientDesc, StochasticGD, AdaGrad, RMSProp}; use rm::learning::optim::OptimAlgorithm; use rm::linalg::Matrix; @@ -82,4 +82,19 @@ fn convex_adagrad_training() { assert!(params[0] - 20f64 < 1e-10); assert!(x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10); +} + +#[test] +fn convex_rmsprop_training() { + let x_sq = XSqModel { c: 20f64 }; + + let rms = RMSProp::new(0.05, 0.9, 1e-5, 50); + let test_data = vec![100f64]; + let params = rms.optimize(&x_sq, + &test_data[..], + &Matrix::zeros(100, 1), + &Matrix::zeros(100, 1)); + + assert!(params[0] - 20f64 < 1e-10); + assert!(x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10); } \ No newline at end of file From 5106ed6eb4c2f30602925c657f639b65a2150789 Mon Sep 17 00:00:00 2001 From: NivenT Date: Sat, 3 Sep 2016 01:46:02 -0400 Subject: [PATCH 02/28] Made a few RMSProp comments more readable --- src/learning/optim/grad_desc.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/learning/optim/grad_desc.rs b/src/learning/optim/grad_desc.rs index 26c96631..4ec34222 100644 --- a/src/learning/optim/grad_desc.rs +++ b/src/learning/optim/grad_desc.rs @@ -310,7 +310,7 @@ impl, Targets = Matrix>> OptimAlgorithm pub struct RMSProp { /// The base step size of gradient descent steps learning_rate: f64, - /// Factor of averaged square gradients to keep + /// Rate at which running total of average square gradients decays decay_rate: f64, /// Small value used to avoid divide by zero epsilon: f64, @@ -378,8 +378,8 @@ impl OptimAlgorithm for RMSProp // Set up indices for permutation let mut permutation = (0..inputs.rows()).collect::>(); - // The cost at the start of each iteration - let mut start_iter_cost = 0f64; + // The cost from the previous iteration + let mut prev_cost = 0f64; for _ in 0..self.iters { // The cost at end of each pass @@ -397,17 +397,17 @@ impl OptimAlgorithm for RMSProp rmsprop_cache = &rmsprop_cache*self.decay_rate + &grad_squared*(1.0 - self.decay_rate); // RMSProp update rule params = ¶ms - (&grad*self.learning_rate).elediv( - &rmsprop_cache.clone().apply(&|x| (x + self.epsilon).sqrt())); + &rmsprop_cache.clone().apply(&|x| (x + self.epsilon).sqrt())); end_cost += cost; } end_cost /= inputs.rows() as f64; // Early stopping - if (start_iter_cost - end_cost).abs() < LEARNING_EPS { + if (prev_cost - end_cost).abs() < LEARNING_EPS { break; } else { - start_iter_cost = end_cost; + prev_cost = end_cost; } } params.into_vec() From 24f9d2b09a7a1a1c10762f6b26417f3bb2a118a0 Mon Sep 17 00:00:00 2001 From: NivenT Date: Sat, 3 Sep 2016 14:01:34 -0400 Subject: [PATCH 03/28] Removed clone --- src/learning/optim/grad_desc.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/learning/optim/grad_desc.rs b/src/learning/optim/grad_desc.rs index 4ec34222..a8c50530 100644 --- a/src/learning/optim/grad_desc.rs +++ b/src/learning/optim/grad_desc.rs @@ -391,14 +391,16 @@ impl OptimAlgorithm for RMSProp &inputs.select_rows(&[*i]), &targets.select_rows(&[*i])); - let grad = Vector::new(grad); + let mut grad = Vector::new(grad); let grad_squared = grad.clone().apply(&|x| x*x); // Update cached average of squared gradients rmsprop_cache = &rmsprop_cache*self.decay_rate + &grad_squared*(1.0 - self.decay_rate); // RMSProp update rule - params = ¶ms - (&grad*self.learning_rate).elediv( - &rmsprop_cache.clone().apply(&|x| (x + self.epsilon).sqrt())); - + utils::in_place_vec_bin_op(grad.mut_data(), rmsprop_cache.data(), |x, &y| { + *x = *x * self.learning_rate / (y + self.epsilon).sqrt(); + }); + params = ¶ms - &grad; + end_cost += cost; } end_cost /= inputs.rows() as f64; From 1e15d5a124d306863f6e86257ebfa58f87487b9e Mon Sep 17 00:00:00 2001 From: NivenT Date: Thu, 8 Sep 2016 12:41:31 -0400 Subject: [PATCH 04/28] Created NetLayer trait --- src/learning/nnet.rs | 2 + src/learning/toolkit/net_layer.rs | 105 ++++++++++++++++++++++++++++++ src/lib.rs | 1 + 3 files changed, 108 insertions(+) create mode 100644 src/learning/toolkit/net_layer.rs diff --git a/src/learning/nnet.rs b/src/learning/nnet.rs index 06a4a731..a55484c1 100644 --- a/src/learning/nnet.rs +++ b/src/learning/nnet.rs @@ -50,6 +50,8 @@ use learning::toolkit::activ_fn::ActivationFunc; use learning::toolkit::cost_fn; use learning::toolkit::cost_fn::CostFunc; use learning::toolkit::regularization::Regularization; +use learning::toolkit::net_layer; +use learning::toolkit::net_layer::NetLayer; use learning::optim::{Optimizable, OptimAlgorithm}; use learning::optim::grad_desc::StochasticGD; diff --git a/src/learning/toolkit/net_layer.rs b/src/learning/toolkit/net_layer.rs new file mode 100644 index 00000000..f6a5184d --- /dev/null +++ b/src/learning/toolkit/net_layer.rs @@ -0,0 +1,105 @@ +//!Neural Network Layers + +use linalg::{Matrix, MatrixSlice}; +use linalg::BaseSlice; + +use learning::toolkit::activ_fn::ActivationFunc; + +use rand::thread_rng; +use rand::distributions::Sample; +use rand::distributions::normal::Normal; + +/// Trait for neural net layers +pub trait NetLayer { + /// The result of propogating data forward through this layer + fn forward(&self, input: &Matrix, params: MatrixSlice) -> Matrix; + + /// The gradient of the output of this layer with respect to its input + fn backward(&self, out_grad: &Matrix, params: MatrixSlice) -> Matrix; + + /// The default value of the parameters of this layer before training + fn default_params(&self) -> Vec; + + /// The number of parameters used by this layer + fn num_params(&self) -> usize; + + /// The shape of the parameters used by this layer + fn param_shape(&self) -> (usize, usize); +} + +/// Linear network layer +/// +/// The parameters are a matrix of weights of size O x I +/// where O is the dimensionality of the output and I the dimensionality of the input +#[derive(Debug, Clone, Copy)] +pub struct Linear { + /// The number of dimensions of the input + input_size: usize, + /// The number of dimensions of the output + output_size: usize, +} + +impl Linear { + /// Construct a Linear layer + pub fn new(input_size: usize, output_size: usize) -> Linear { + Linear{input_size: input_size, output_size: output_size} + } +} + +impl NetLayer for Linear { + /// Computes a matrix product + /// + /// input should have dimensions N x I + /// where N is the number of samples and I is the dimensionality of the input + fn forward(&self, input: &Matrix, params: MatrixSlice) -> Matrix { + assert_eq!(input.cols(), params.rows()); + input * ¶ms + } + + fn backward(&self, out_grad: &Matrix, params: MatrixSlice) -> Matrix { + assert_eq!(out_grad.cols(), params.cols()); + out_grad * params.into_matrix().transpose() + } + + /// Initializes weights using Xavier initialization + /// + /// weights draw from gaussian distribution with 0 mean and variance 2/(input_size+output_size) + fn default_params(&self) -> Vec { + let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt()); + let mut rng = thread_rng(); + + (0..self.input_size*self.output_size).map(|_| distro.sample(&mut rng)) + .collect() + } + + fn num_params(&self) -> usize { + self.output_size * self.input_size + } + + fn param_shape(&self) -> (usize, usize) { + (self.input_size, self.output_size) + } +} + +impl NetLayer for T { + /// Applys the activation function to each element of the input + fn forward(&self, input: &Matrix, _: MatrixSlice) -> Matrix { + input.clone().apply(&T::func) + } + + fn backward(&self, out_grad: &Matrix, _: MatrixSlice) -> Matrix { + out_grad.clone().apply(&T::func_grad) + } + + fn default_params(&self) -> Vec { + vec![] + } + + fn num_params(&self) -> usize { + 0 + } + + fn param_shape(&self) -> (usize, usize) { + (0, 0) + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 5323c2e2..96cbfcf8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -201,6 +201,7 @@ pub mod learning { pub mod cost_fn; pub mod rand_utils; pub mod regularization; + pub mod net_layer; } } From d1399998fed26807f7d0d94e6d28fa24dac47353 Mon Sep 17 00:00:00 2001 From: NivenT Date: Thu, 8 Sep 2016 12:47:13 -0400 Subject: [PATCH 05/28] Added hyperbolic tangent activation function --- src/learning/toolkit/activ_fn.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/learning/toolkit/activ_fn.rs b/src/learning/toolkit/activ_fn.rs index 0ffe4569..8f1ef03c 100644 --- a/src/learning/toolkit/activ_fn.rs +++ b/src/learning/toolkit/activ_fn.rs @@ -79,3 +79,22 @@ impl ActivationFunc for Exp { x.ln() } } + +/// Hyperbolic tangent activation function +#[derive(Clone, Copy, Debug)] +pub struct Tanh; + +impl ActivationFunc for Tanh { + fn func(x: f64) -> f64 { + x.tanh() + } + + fn func_grad(x: f64) -> f64 { + let y = x.tanh(); + 1.0 - y*y + } + + fn func_inv(x: f64) -> f64 { + 0.5*((1.0+x)/(1.0-x)).ln() + } +} \ No newline at end of file From ff998317e8d599b5059b70a292ee3d75c2701a97 Mon Sep 17 00:00:00 2001 From: NivenT Date: Thu, 8 Sep 2016 20:46:02 -0400 Subject: [PATCH 06/28] Update neural net implementation --- examples/nnet-and_gate.rs | 3 +- src/learning/nnet.rs | 284 +++++++++++++++++------------- src/learning/toolkit/activ_fn.rs | 4 +- src/learning/toolkit/net_layer.rs | 66 ++++++- 4 files changed, 221 insertions(+), 136 deletions(-) diff --git a/examples/nnet-and_gate.rs b/examples/nnet-and_gate.rs index bf319bf1..6d52145a 100644 --- a/examples/nnet-and_gate.rs +++ b/examples/nnet-and_gate.rs @@ -6,6 +6,7 @@ use std::vec::Vec; use rusty_machine::learning::nnet::{NeuralNet, BCECriterion}; use rusty_machine::learning::toolkit::regularization::Regularization; +use rusty_machine::learning::toolkit::activ_fn::Sigmoid; use rusty_machine::learning::optim::grad_desc::StochasticGD; use rusty_machine::linalg::Matrix; @@ -41,7 +42,7 @@ fn main() { let layers = &[2, 1]; let criterion = BCECriterion::new(Regularization::L2(0.)); - let mut model = NeuralNet::new(layers, criterion, StochasticGD::default()); + let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid); println!("Training..."); model.train(&inputs, &targets); diff --git a/src/learning/nnet.rs b/src/learning/nnet.rs index a55484c1..4029c1de 100644 --- a/src/learning/nnet.rs +++ b/src/learning/nnet.rs @@ -7,6 +7,7 @@ //! ``` //! use rusty_machine::learning::nnet::{NeuralNet, BCECriterion}; //! use rusty_machine::learning::toolkit::regularization::Regularization; +//! use rusty_machine::learning::toolkit::activ_fn::Sigmoid; //! use rusty_machine::learning::optim::grad_desc::StochasticGD; //! use rusty_machine::linalg::Matrix; //! use rusty_machine::learning::SupModel; @@ -23,7 +24,7 @@ //! let criterion = BCECriterion::new(Regularization::L2(0.1)); //! //! // We will just use the default stochastic gradient descent. -//! let mut model = NeuralNet::new(layers, criterion, StochasticGD::default()); +//! let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid); //! //! // Train the model! //! model.train(&inputs, &targets); @@ -58,25 +59,27 @@ use learning::optim::grad_desc::StochasticGD; use rand::thread_rng; use rand::distributions::{Sample, range}; +use std::fmt::Debug; + /// Neural Network Model /// /// The Neural Network struct specifies a Criterion and /// a gradient descent algorithm. #[derive(Debug)] -pub struct NeuralNet<'a, T, A> +pub struct NeuralNet where T: Criterion, - A: OptimAlgorithm> + A: OptimAlgorithm> { - base: BaseNeuralNet<'a, T>, + base: BaseNeuralNet, alg: A, } /// Supervised learning for the Neural Network. /// /// The model is trained using back propagation. -impl<'a, T, A> SupModel, Matrix> for NeuralNet<'a, T, A> +impl SupModel, Matrix> for NeuralNet where T: Criterion, - A: OptimAlgorithm> + A: OptimAlgorithm> { /// Predict neural network output using forward propagation. fn predict(&self, inputs: &Matrix) -> Matrix { @@ -90,7 +93,7 @@ impl<'a, T, A> SupModel, Matrix> for NeuralNet<'a, T, A> } } -impl<'a> NeuralNet<'a, BCECriterion, StochasticGD> { +impl NeuralNet { /// Creates a neural network with the specified layer sizes. /// /// The layer sizes slice should include the input, hidden layers, and output layer sizes. @@ -109,17 +112,36 @@ impl<'a> NeuralNet<'a, BCECriterion, StochasticGD> { /// ``` pub fn default(layer_sizes: &[usize]) -> NeuralNet { NeuralNet { - base: BaseNeuralNet::default(layer_sizes), + base: BaseNeuralNet::default(layer_sizes, activ_fn::Sigmoid), alg: StochasticGD::default(), } } } -impl<'a, T, A> NeuralNet<'a, T, A> +impl NeuralNet where T: Criterion, - A: OptimAlgorithm> + A: OptimAlgorithm> { - /// Create a new neural network with the specified layer sizes. + /// Create a new neural network with no layers + /// + /// # Examples + /// + /// ``` + /// use rusty_machine::learning::nnet::BCECriterion; + /// use rusty_machine::learning::nnet::NeuralNet; + /// use rusty_machine::learning::optim::grad_desc::StochasticGD; + /// + /// // Create a an empty neural net + /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); + /// ``` + pub fn new(criterion: T, alg: A) -> NeuralNet { + NeuralNet { + base: BaseNeuralNet::new(criterion), + alg: alg, + } + } + + /// Create a multilayer perceptron with the specified layer sizes. /// /// The layer sizes slice should include the input, hidden layers, and output layer sizes. /// The type of activation function must be specified. @@ -131,19 +153,44 @@ impl<'a, T, A> NeuralNet<'a, T, A> /// ``` /// use rusty_machine::learning::nnet::BCECriterion; /// use rusty_machine::learning::nnet::NeuralNet; + /// use rusty_machine::learning::toolkit::activ_fn::Sigmoid; /// use rusty_machine::learning::optim::grad_desc::StochasticGD; /// /// // Create a neural net with 4 layers, 3 neurons in each. /// let layers = &[3; 4]; - /// let mut net = NeuralNet::new(layers, BCECriterion::default(), StochasticGD::default()); + /// let mut net = NeuralNet::mlp(layers, BCECriterion::default(), StochasticGD::default(), Sigmoid); /// ``` - pub fn new(layer_sizes: &'a [usize], criterion: T, alg: A) -> NeuralNet<'a, T, A> { + pub fn mlp(layer_sizes: &[usize], criterion: T, alg: A, activ_fn: U) -> NeuralNet + where U: ActivationFunc + 'static { NeuralNet { - base: BaseNeuralNet::new(layer_sizes, criterion), + base: BaseNeuralNet::mlp(layer_sizes, criterion, activ_fn), alg: alg, } } + /// Adds the specified layer to the end of the network + /// + /// # Examples + /// + /// ``` + /// use rusty_machine::linalg::BaseSlice; + /// use rusty_machine::learning::nnet::BCECriterion; + /// use rusty_machine::learning::nnet::NeuralNet; + /// use rusty_machine::learning::optim::grad_desc::StochasticGD; + /// use rusty_machine::learning::toolkit::net_layer::Linear; + /// + /// // Create a new neural net + /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); + /// + /// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5 + /// net.add_layer(Box::new(Linear::new(3, 4))); + /// net.add_layer(Box::new(Linear::new(4, 5))); + /// ``` + pub fn add_layer<'a>(&'a mut self, layer: Box) -> &'a mut NeuralNet { + self.base.add_layer(layer); + self + } + /// Gets matrix of weights between specified layer and forward layer. /// /// # Examples @@ -156,7 +203,7 @@ impl<'a, T, A> NeuralNet<'a, T, A> /// let layers = &[3; 4]; /// let mut net = NeuralNet::default(layers); /// - /// let w = &net.get_net_weights(2); + /// let w = &net.get_net_weights(1); /// /// // We add a bias term to the weight matrix /// assert_eq!(w.rows(), 4); @@ -171,29 +218,49 @@ impl<'a, T, A> NeuralNet<'a, T, A> /// /// This struct cannot be instantianated and is used internally only. #[derive(Debug)] -pub struct BaseNeuralNet<'a, T: Criterion> { - layer_sizes: &'a [usize], +pub struct BaseNeuralNet { + layers: Vec>, weights: Vec, criterion: T, } -impl<'a> BaseNeuralNet<'a, BCECriterion> { +impl BaseNeuralNet { /// Creates a base neural network with the specified layer sizes. - fn default(layer_sizes: &[usize]) -> BaseNeuralNet { - BaseNeuralNet::new(layer_sizes, BCECriterion::default()) + fn default(layer_sizes: &[usize], activ_fn: U) -> BaseNeuralNet + where U: ActivationFunc + Debug + 'static { + BaseNeuralNet::mlp(layer_sizes, BCECriterion::default(), activ_fn) } } -impl<'a, T: Criterion> BaseNeuralNet<'a, T> { - /// Create a new base neural network with the specified layer sizes. - fn new(layer_sizes: &[usize], criterion: T) -> BaseNeuralNet { +impl BaseNeuralNet { + /// Create a base neural network with no layers + fn new(criterion: T) -> BaseNeuralNet { BaseNeuralNet { - layer_sizes: layer_sizes, - weights: BaseNeuralNet::::create_weights(layer_sizes), - criterion: criterion, + layers: Vec::new(), + weights: Vec::new(), + criterion: criterion + } + } + + /// Create a multilayer perceptron with the specified layer sizes. + fn mlp<'a, U>(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet + where U: ActivationFunc + 'static { + let mut mlp = BaseNeuralNet::new(criterion); + for shape in layer_sizes.windows(2) { + mlp.add_layer(Box::new(net_layer::Bias)); + mlp.add_layer(Box::new(net_layer::Linear::new(shape[0]+1, shape[1]))); + mlp.add_layer(Box::new(activ_fn.clone())); } + mlp + } + + /// Adds the specified layer to the end of the network + fn add_layer<'a>(&'a mut self, layer: Box) -> &'a mut BaseNeuralNet { + self.weights.append(&mut layer.default_params()); + self.layers.push(layer); + self } /// Creates initial weights for all neurons in the network. @@ -212,31 +279,30 @@ impl<'a, T: Criterion> BaseNeuralNet<'a, T> { }).collect() } - /// Gets matrix of weights between specified layer and forward layer for the weights. + /// Gets matrix of weights for the specified layer for the weights. fn get_layer_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice { - debug_assert!(idx < self.layer_sizes.len() - 1); + debug_assert!(idx < self.layers.len()); // Check that the weights are the right size. let mut full_size = 0usize; - for l in 0..self.layer_sizes.len() - 1 { - full_size += (self.layer_sizes[l] + 1) * self.layer_sizes[l + 1]; + for l in &self.layers { + full_size += l.num_params(); } debug_assert_eq!(full_size, weights.len()); let mut start = 0usize; - - for l in 0..idx { - start += (self.layer_sizes[l] + 1) * self.layer_sizes[l + 1] + for l in &self.layers[..idx] { + start += l.num_params(); } + let shape = self.layers[idx].param_shape(); unsafe { MatrixSlice::from_raw_parts(weights.as_ptr().offset(start as isize), - self.layer_sizes[idx] + 1, - self.layer_sizes[idx + 1], - self.layer_sizes[idx + 1]) + shape.0, + shape.1, + shape.1) } - } /// Gets matrix of weights between specified layer and forward layer @@ -257,115 +323,83 @@ impl<'a, T: Criterion> BaseNeuralNet<'a, T> { inputs: &Matrix, targets: &Matrix) -> (f64, Vec) { - assert_eq!(inputs.cols(), self.layer_sizes[0]); - - let mut forward_weights = Vec::with_capacity(self.layer_sizes.len() - 1); - let mut activations = Vec::with_capacity(self.layer_sizes.len()); - - let net_data = Matrix::ones(inputs.rows(), 1).hcat(inputs); - - activations.push(net_data.clone()); + let mut gradients = Vec::with_capacity(weights.len()); + unsafe { + gradients.set_len(weights.len()); + } + //activations[0] is input and activations[i+1] is output of layer[i] + let mut activations = Vec::with_capacity(self.layers.len()+1); // Forward propagation - { - let mut z = net_data * self.get_layer_weights(weights, 0); - forward_weights.push(z.clone()); - - for l in 1..self.layer_sizes.len() - 1 { - let mut a = self.criterion.activate(z.clone()); - let ones = Matrix::ones(a.rows(), 1); - - a = ones.hcat(&a); - - z = &a * self.get_layer_weights(weights, l); - activations.push(a); - forward_weights.push(z.clone()); - } - - activations.push(self.criterion.activate(z)); + + let mut index = 0; + activations.push(inputs.clone()); + for layer in &self.layers { + let shape = layer.param_shape(); + + let slice = unsafe { + MatrixSlice::from_raw_parts(weights.as_ptr().offset(index as isize), + shape.0, + shape.1, + shape.1) + }; + + let output = layer.forward(activations.last().unwrap(), slice); + activations.push(output); + index += layer.num_params(); } + let output = &activations.last().unwrap(); - let mut deltas = Vec::with_capacity(self.layer_sizes.len() - 1); // Backward propagation - { - let z = forward_weights[self.layer_sizes.len() - 2].clone(); - let g = self.criterion.grad_activ(z); - - // Take GRAD_cost to compute this delta. - let mut delta = self.criterion - .cost_grad(&activations[self.layer_sizes.len() - 1], targets) - .elemul(&g); - - deltas.push(delta.clone()); - - for l in (1..self.layer_sizes.len() - 1).rev() { - let mut z = forward_weights[l - 1].clone(); - let ones = Matrix::ones(z.rows(), 1); - z = ones.hcat(&z); - - let g = self.criterion.grad_activ(z); - delta = (delta * Matrix::from(self.get_layer_weights(weights, l)).transpose()) - .elemul(&g); - - let non_one_rows = &(1..delta.cols()).collect::>()[..]; - delta = delta.select_cols(non_one_rows); - deltas.push(delta.clone()); - } - } - let mut gradients = Vec::with_capacity(weights.len()); + //The gradient with respect to the current layer's output + let mut out_grad = self.criterion.cost_grad(output, targets); + // at this point index == weights.len() + for (i, layer) in self.layers.iter().enumerate().rev() { + index -= layer.num_params(); + let shape = layer.param_shape(); - for (l, activ_item) in activations.iter().take(self.layer_sizes.len() - 1).enumerate() { - // Compute the gradient - let mut g = deltas[self.layer_sizes.len() - 2 - l].transpose() * activ_item; + let slice = unsafe { + MatrixSlice::from_raw_parts(weights.as_ptr().offset(index as isize), + shape.0, + shape.1, + shape.1) + }; - // Add the regularized gradient - if self.criterion.is_regularized() { - let layer = l; - let non_bias_weights = self.get_non_bias_weights(weights, layer); - let zeros = Matrix::zeros(1, non_bias_weights.cols()); - g += zeros.vcat(&self.criterion.reg_cost_grad(non_bias_weights)); - } + let grad_params = layer.back_params(&out_grad, &activations[i], slice); + out_grad = layer.back_input(&out_grad, &activations[i], slice); - gradients.append(&mut (g / inputs.rows() as f64).into_vec()); - } - - // Compute the cost - let mut cost = self.criterion.cost(&activations[activations.len() - 1], targets); - - // Add the regularized cost - if self.criterion.is_regularized() { - for i in 0..self.layer_sizes.len() - 1 { - cost += self.criterion.reg_cost(self.get_non_bias_weights(weights, i)); - } + gradients[index..index+layer.num_params()].copy_from_slice(&grad_params.data()); } + let cost = self.criterion.cost(output, targets); (cost, gradients) } /// Forward propagation of the model weights to get the outputs. fn forward_prop(&self, inputs: &Matrix) -> Matrix { - assert_eq!(inputs.cols(), self.layer_sizes[0]); - - let net_data = Matrix::ones(inputs.rows(), 1).hcat(inputs); - - let mut z = net_data * self.get_net_weights(0); - let mut a = self.criterion.activate(z.clone()); - - for l in 1..self.layer_sizes.len() - 1 { - let ones = Matrix::ones(a.rows(), 1); - a = ones.hcat(&a); - z = a * self.get_net_weights(l); - a = self.criterion.activate(z.clone()); + let mut index = 0; + let mut outputs = inputs.clone(); + for layer in &self.layers { + let shape = layer.param_shape(); + + let slice = unsafe { + MatrixSlice::from_raw_parts(self.weights.as_ptr().offset(index as isize), + shape.0, + shape.1, + shape.1) + }; + + outputs = layer.forward(&outputs, slice); + index += layer.num_params(); } - - a + outputs } } /// Compute the gradient of the Neural Network using the /// back propagation algorithm. -impl<'a, T: Criterion> Optimizable for BaseNeuralNet<'a, T> { +impl Optimizable for BaseNeuralNet { type Inputs = Matrix; type Targets = Matrix; @@ -384,7 +418,7 @@ impl<'a, T: Criterion> Optimizable for BaseNeuralNet<'a, T> { /// Specifies an activation function and a cost function. pub trait Criterion { /// The activation function for the criterion. - type ActFunc: ActivationFunc; + type ActFunc: ActivationFunc + Debug; /// The cost function for the criterion. type Cost: CostFunc>; diff --git a/src/learning/toolkit/activ_fn.rs b/src/learning/toolkit/activ_fn.rs index 8f1ef03c..e8e76fc1 100644 --- a/src/learning/toolkit/activ_fn.rs +++ b/src/learning/toolkit/activ_fn.rs @@ -8,8 +8,10 @@ //! You can also create your own custom activation Functions for use in your models. //! Just create a unit struct implementing the `ActivationFunc` trait. +use std::fmt::Debug; + /// Trait for activation functions in models. -pub trait ActivationFunc { +pub trait ActivationFunc: Clone + Debug { /// The activation function. fn func(x: f64) -> f64; diff --git a/src/learning/toolkit/net_layer.rs b/src/learning/toolkit/net_layer.rs index f6a5184d..8a2aaa74 100644 --- a/src/learning/toolkit/net_layer.rs +++ b/src/learning/toolkit/net_layer.rs @@ -9,13 +9,18 @@ use rand::thread_rng; use rand::distributions::Sample; use rand::distributions::normal::Normal; +use std::fmt::Debug; + /// Trait for neural net layers -pub trait NetLayer { +pub trait NetLayer : Debug { /// The result of propogating data forward through this layer fn forward(&self, input: &Matrix, params: MatrixSlice) -> Matrix; /// The gradient of the output of this layer with respect to its input - fn backward(&self, out_grad: &Matrix, params: MatrixSlice) -> Matrix; + fn back_input(&self, out_grad: &Matrix, input: &Matrix, params: MatrixSlice) -> Matrix; + + /// The gradient of the output of this layer with respect to its parameters + fn back_params(&self, out_grad: &Matrix, input: &Matrix, params: MatrixSlice) -> Matrix; /// The default value of the parameters of this layer before training fn default_params(&self) -> Vec; @@ -56,9 +61,14 @@ impl NetLayer for Linear { input * ¶ms } - fn backward(&self, out_grad: &Matrix, params: MatrixSlice) -> Matrix { + fn back_input(&self, out_grad: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { assert_eq!(out_grad.cols(), params.cols()); - out_grad * params.into_matrix().transpose() + out_grad * ¶ms.into_matrix().transpose() + } + + fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { + assert_eq!(input.rows(), out_grad.rows()); + input.transpose() * out_grad } /// Initializes weights using Xavier initialization @@ -73,7 +83,7 @@ impl NetLayer for Linear { } fn num_params(&self) -> usize { - self.output_size * self.input_size + self.input_size * self.output_size } fn param_shape(&self) -> (usize, usize) { @@ -81,18 +91,56 @@ impl NetLayer for Linear { } } -impl NetLayer for T { +/// Bias layer +/// +/// Adds a constant 1. to the end of each input +/// Allows a linear to act like it has a bias term, for example +#[derive(Debug, Clone, Copy)] +pub struct Bias; + +impl NetLayer for Bias { + fn forward(&self, input: &Matrix, _: MatrixSlice) -> Matrix { + input.hcat(&Matrix::::ones(input.rows(), 1)) + } + + fn back_input(&self, out_grad: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { + let columns: Vec<_> = (0..out_grad.cols()-1).collect(); + out_grad.select_cols(&columns) + } + + fn back_params(&self, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { + Matrix::new(0, 0, Vec::new()) + } + + fn default_params(&self) -> Vec { + Vec::new() + } + + fn num_params(&self) -> usize { + 0 + } + + fn param_shape(&self) -> (usize, usize) { + (0, 0) + } +} + +impl NetLayer for T { /// Applys the activation function to each element of the input fn forward(&self, input: &Matrix, _: MatrixSlice) -> Matrix { input.clone().apply(&T::func) } - fn backward(&self, out_grad: &Matrix, _: MatrixSlice) -> Matrix { - out_grad.clone().apply(&T::func_grad) + fn back_input(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { + out_grad.elemul(&input.clone().apply(&T::func_grad)) + } + + fn back_params(&self, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { + Matrix::new(0, 0, Vec::new()) } fn default_params(&self) -> Vec { - vec![] + Vec::new() } fn num_params(&self) -> usize { From 8cbe7424271798e766c959deabec89155b5906a9 Mon Sep 17 00:00:00 2001 From: NivenT Date: Fri, 9 Sep 2016 16:13:32 -0400 Subject: [PATCH 07/28] Combined Bias and Linear layers --- src/learning/nnet.rs | 28 +++++++---- src/learning/toolkit/net_layer.rs | 84 +++++++++++++++---------------- 2 files changed, 60 insertions(+), 52 deletions(-) diff --git a/src/learning/nnet.rs b/src/learning/nnet.rs index 4029c1de..0f2ea7d6 100644 --- a/src/learning/nnet.rs +++ b/src/learning/nnet.rs @@ -183,8 +183,8 @@ impl NeuralNet /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); /// /// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5 - /// net.add_layer(Box::new(Linear::new(3, 4))); - /// net.add_layer(Box::new(Linear::new(4, 5))); + /// net.add_layer(Box::new(Linear::default(3, 4))); + /// net.add_layer(Box::new(Linear::default(4, 5))); /// ``` pub fn add_layer<'a>(&'a mut self, layer: Box) -> &'a mut NeuralNet { self.base.add_layer(layer); @@ -203,7 +203,7 @@ impl NeuralNet /// let layers = &[3; 4]; /// let mut net = NeuralNet::default(layers); /// - /// let w = &net.get_net_weights(1); + /// let w = &net.get_net_weights(2); /// /// // We add a bias term to the weight matrix /// assert_eq!(w.rows(), 4); @@ -249,8 +249,7 @@ impl BaseNeuralNet { where U: ActivationFunc + 'static { let mut mlp = BaseNeuralNet::new(criterion); for shape in layer_sizes.windows(2) { - mlp.add_layer(Box::new(net_layer::Bias)); - mlp.add_layer(Box::new(net_layer::Linear::new(shape[0]+1, shape[1]))); + mlp.add_layer(Box::new(net_layer::Linear::default(shape[0], shape[1]))); mlp.add_layer(Box::new(activ_fn.clone())); } mlp @@ -294,7 +293,7 @@ impl BaseNeuralNet { let mut start = 0usize; for l in &self.layers[..idx] { start += l.num_params(); - } + } let shape = self.layers[idx].param_shape(); unsafe { @@ -348,7 +347,7 @@ impl BaseNeuralNet { activations.push(output); index += layer.num_params(); } - let output = &activations.last().unwrap(); + let output = activations.last().unwrap(); // Backward propagation @@ -379,8 +378,19 @@ impl BaseNeuralNet { /// Forward propagation of the model weights to get the outputs. fn forward_prop(&self, inputs: &Matrix) -> Matrix { let mut index = 0; - let mut outputs = inputs.clone(); - for layer in &self.layers { + if self.layers.len() == 0 { + return inputs.clone(); + } + + let mut outputs = unsafe { + let shape = self.layers[0].param_shape(); + let slice = MatrixSlice::from_raw_parts(self.weights.as_ptr(), + shape.0, + shape.1, + shape.1); + self.layers[0].forward(inputs, slice) + }; + for layer in self.layers.iter().skip(1) { let shape = layer.param_shape(); let slice = unsafe { diff --git a/src/learning/toolkit/net_layer.rs b/src/learning/toolkit/net_layer.rs index 8a2aaa74..b554c112 100644 --- a/src/learning/toolkit/net_layer.rs +++ b/src/learning/toolkit/net_layer.rs @@ -34,7 +34,9 @@ pub trait NetLayer : Debug { /// Linear network layer /// -/// The parameters are a matrix of weights of size O x I +/// Represents a fully connected layer with optional bias term +/// +/// The parameters are a matrix of weights of size I x O /// where O is the dimensionality of the output and I the dimensionality of the input #[derive(Debug, Clone, Copy)] pub struct Linear { @@ -42,12 +44,27 @@ pub struct Linear { input_size: usize, /// The number of dimensions of the output output_size: usize, + /// Whether or not to include a bias term + has_bias: bool, } impl Linear { - /// Construct a Linear layer - pub fn new(input_size: usize, output_size: usize) -> Linear { - Linear{input_size: input_size, output_size: output_size} + /// Construct a new Linear layer + pub fn new(input_size: usize, output_size: usize, has_bias: bool) -> Linear { + Linear { + input_size: input_size + has_bias as usize, + output_size: output_size, + has_bias: has_bias + } + } + + /// Construct a Linear layer with a bias term + pub fn default(input_size: usize, output_size: usize) -> Linear { + Linear { + input_size: input_size + 1, + output_size: output_size, + has_bias: true + } } } @@ -57,23 +74,38 @@ impl NetLayer for Linear { /// input should have dimensions N x I /// where N is the number of samples and I is the dimensionality of the input fn forward(&self, input: &Matrix, params: MatrixSlice) -> Matrix { - assert_eq!(input.cols(), params.rows()); - input * ¶ms + if self.has_bias { + assert_eq!(input.cols()+1, params.rows()); + input.hcat(&Matrix::::ones(input.rows(), 1)) * ¶ms + } else { + assert_eq!(input.cols(), params.rows()); + input * ¶ms + } } fn back_input(&self, out_grad: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { assert_eq!(out_grad.cols(), params.cols()); - out_grad * ¶ms.into_matrix().transpose() + let gradient = out_grad * ¶ms.into_matrix().transpose(); + if self.has_bias { + let columns: Vec<_> = (0..gradient.cols()-1).collect(); + gradient.select_cols(&columns) + } else { + gradient + } } fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { assert_eq!(input.rows(), out_grad.rows()); - input.transpose() * out_grad + if self.has_bias { + input.transpose().vcat(&Matrix::::ones(1, input.rows())) * out_grad + } else { + input.transpose() * out_grad + } } /// Initializes weights using Xavier initialization /// - /// weights draw from gaussian distribution with 0 mean and variance 2/(input_size+output_size) + /// weights drawn from gaussian distribution with 0 mean and variance 2/(input_size+output_size) fn default_params(&self) -> Vec { let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt()); let mut rng = thread_rng(); @@ -91,40 +123,6 @@ impl NetLayer for Linear { } } -/// Bias layer -/// -/// Adds a constant 1. to the end of each input -/// Allows a linear to act like it has a bias term, for example -#[derive(Debug, Clone, Copy)] -pub struct Bias; - -impl NetLayer for Bias { - fn forward(&self, input: &Matrix, _: MatrixSlice) -> Matrix { - input.hcat(&Matrix::::ones(input.rows(), 1)) - } - - fn back_input(&self, out_grad: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { - let columns: Vec<_> = (0..out_grad.cols()-1).collect(); - out_grad.select_cols(&columns) - } - - fn back_params(&self, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { - Matrix::new(0, 0, Vec::new()) - } - - fn default_params(&self) -> Vec { - Vec::new() - } - - fn num_params(&self) -> usize { - 0 - } - - fn param_shape(&self) -> (usize, usize) { - (0, 0) - } -} - impl NetLayer for T { /// Applys the activation function to each element of the input fn forward(&self, input: &Matrix, _: MatrixSlice) -> Matrix { From e19c427aa8217e54f5ac1d93a617c2835a0d8c86 Mon Sep 17 00:00:00 2001 From: NivenT Date: Fri, 9 Sep 2016 16:31:52 -0400 Subject: [PATCH 08/28] Replace Linear::{new, default} with Linear::with{out}_bias --- src/learning/nnet.rs | 6 +++--- src/learning/toolkit/net_layer.rs | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/learning/nnet.rs b/src/learning/nnet.rs index 0f2ea7d6..a26d034f 100644 --- a/src/learning/nnet.rs +++ b/src/learning/nnet.rs @@ -183,8 +183,8 @@ impl NeuralNet /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); /// /// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5 - /// net.add_layer(Box::new(Linear::default(3, 4))); - /// net.add_layer(Box::new(Linear::default(4, 5))); + /// net.add_layer(Box::new(Linear::with_bias(3, 4))); + /// net.add_layer(Box::new(Linear::with_bias(4, 5))); /// ``` pub fn add_layer<'a>(&'a mut self, layer: Box) -> &'a mut NeuralNet { self.base.add_layer(layer); @@ -249,7 +249,7 @@ impl BaseNeuralNet { where U: ActivationFunc + 'static { let mut mlp = BaseNeuralNet::new(criterion); for shape in layer_sizes.windows(2) { - mlp.add_layer(Box::new(net_layer::Linear::default(shape[0], shape[1]))); + mlp.add_layer(Box::new(net_layer::Linear::with_bias(shape[0], shape[1]))); mlp.add_layer(Box::new(activ_fn.clone())); } mlp diff --git a/src/learning/toolkit/net_layer.rs b/src/learning/toolkit/net_layer.rs index b554c112..37957491 100644 --- a/src/learning/toolkit/net_layer.rs +++ b/src/learning/toolkit/net_layer.rs @@ -50,20 +50,20 @@ pub struct Linear { impl Linear { /// Construct a new Linear layer - pub fn new(input_size: usize, output_size: usize, has_bias: bool) -> Linear { + pub fn with_bias(input_size: usize, output_size: usize) -> Linear { Linear { - input_size: input_size + has_bias as usize, + input_size: input_size + 1, output_size: output_size, - has_bias: has_bias + has_bias: true } } /// Construct a Linear layer with a bias term - pub fn default(input_size: usize, output_size: usize) -> Linear { + pub fn without_bias(input_size: usize, output_size: usize) -> Linear { Linear { - input_size: input_size + 1, + input_size: input_size, output_size: output_size, - has_bias: true + has_bias: false } } } From 40dcdcc954fde33feaf0f826895b9d173b116fb0 Mon Sep 17 00:00:00 2001 From: NivenT Date: Wed, 21 Sep 2016 16:07:39 -0700 Subject: [PATCH 09/28] Expanded nnet module --- examples/nnet-and_gate.rs | 2 ++ src/learning/{nnet.rs => nnet/mod.rs} | 20 ++++++++++++-------- src/learning/{toolkit => nnet}/net_layer.rs | 2 +- src/lib.rs | 1 - 4 files changed, 15 insertions(+), 10 deletions(-) rename src/learning/{nnet.rs => nnet/mod.rs} (97%) rename src/learning/{toolkit => nnet}/net_layer.rs (98%) diff --git a/examples/nnet-and_gate.rs b/examples/nnet-and_gate.rs index 6d52145a..d7dd90f0 100644 --- a/examples/nnet-and_gate.rs +++ b/examples/nnet-and_gate.rs @@ -42,6 +42,8 @@ fn main() { let layers = &[2, 1]; let criterion = BCECriterion::new(Regularization::L2(0.)); + // Create a multilayer perceptron with an input layer of size 2 and output layer of size 1 + // Uses a Sigmoid activation function and uses Stochastic gradient descent for training let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid); println!("Training..."); diff --git a/src/learning/nnet.rs b/src/learning/nnet/mod.rs similarity index 97% rename from src/learning/nnet.rs rename to src/learning/nnet/mod.rs index a26d034f..a79efdfb 100644 --- a/src/learning/nnet.rs +++ b/src/learning/nnet/mod.rs @@ -23,7 +23,7 @@ //! // Choose the BCE criterion with L2 regularization (`lambda=0.1`). //! let criterion = BCECriterion::new(Regularization::L2(0.1)); //! -//! // We will just use the default stochastic gradient descent. +//! // We will create a multilayer perceptron and just use the default stochastic gradient descent. //! let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid); //! //! // Train the model! @@ -42,6 +42,10 @@ //! You can define your own criterion by implementing the `Criterion` //! trait with a concrete `ActivationFunc` and `CostFunc`. +//! TODO: Add documentation + +pub mod net_layer; + use linalg::{Matrix, MatrixSlice}; use linalg::BaseSlice; @@ -51,8 +55,6 @@ use learning::toolkit::activ_fn::ActivationFunc; use learning::toolkit::cost_fn; use learning::toolkit::cost_fn::CostFunc; use learning::toolkit::regularization::Regularization; -use learning::toolkit::net_layer; -use learning::toolkit::net_layer::NetLayer; use learning::optim::{Optimizable, OptimAlgorithm}; use learning::optim::grad_desc::StochasticGD; @@ -61,6 +63,8 @@ use rand::distributions::{Sample, range}; use std::fmt::Debug; +use self::net_layer::NetLayer; + /// Neural Network Model /// /// The Neural Network struct specifies a Criterion and @@ -176,15 +180,15 @@ impl NeuralNet /// use rusty_machine::linalg::BaseSlice; /// use rusty_machine::learning::nnet::BCECriterion; /// use rusty_machine::learning::nnet::NeuralNet; + /// use rusty_machine::learning::nnet::net_layer::Linear; /// use rusty_machine::learning::optim::grad_desc::StochasticGD; - /// use rusty_machine::learning::toolkit::net_layer::Linear; /// /// // Create a new neural net /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); /// /// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5 - /// net.add_layer(Box::new(Linear::with_bias(3, 4))); - /// net.add_layer(Box::new(Linear::with_bias(4, 5))); + /// net.add_layer(Box::new(Linear::new(3, 4))) + /// .add_layer(Box::new(Linear::new(4, 5))); /// ``` pub fn add_layer<'a>(&'a mut self, layer: Box) -> &'a mut NeuralNet { self.base.add_layer(layer); @@ -242,14 +246,14 @@ impl BaseNeuralNet { weights: Vec::new(), criterion: criterion } - } + } /// Create a multilayer perceptron with the specified layer sizes. fn mlp<'a, U>(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet where U: ActivationFunc + 'static { let mut mlp = BaseNeuralNet::new(criterion); for shape in layer_sizes.windows(2) { - mlp.add_layer(Box::new(net_layer::Linear::with_bias(shape[0], shape[1]))); + mlp.add_layer(Box::new(net_layer::Linear::new(shape[0], shape[1]))); mlp.add_layer(Box::new(activ_fn.clone())); } mlp diff --git a/src/learning/toolkit/net_layer.rs b/src/learning/nnet/net_layer.rs similarity index 98% rename from src/learning/toolkit/net_layer.rs rename to src/learning/nnet/net_layer.rs index 37957491..a52db7c1 100644 --- a/src/learning/toolkit/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -50,7 +50,7 @@ pub struct Linear { impl Linear { /// Construct a new Linear layer - pub fn with_bias(input_size: usize, output_size: usize) -> Linear { + pub fn new(input_size: usize, output_size: usize) -> Linear { Linear { input_size: input_size + 1, output_size: output_size, diff --git a/src/lib.rs b/src/lib.rs index 96cbfcf8..5323c2e2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -201,7 +201,6 @@ pub mod learning { pub mod cost_fn; pub mod rand_utils; pub mod regularization; - pub mod net_layer; } } From 3588dc484a40767927655384e8983278594d388e Mon Sep 17 00:00:00 2001 From: NivenT Date: Wed, 21 Sep 2016 17:42:07 -0700 Subject: [PATCH 10/28] Removed apply --- src/learning/nnet/mod.rs | 31 ++++++++++++++++++++++++------- src/learning/nnet/net_layer.rs | 7 +++++-- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 97e9a2dd..4e1a0fea 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -61,6 +61,7 @@ use rand::thread_rng; use rand::distributions::{Sample, range}; use std::fmt::Debug; +use std::iter::IntoIterator; use self::net_layer::NetLayer; @@ -187,14 +188,21 @@ impl NeuralNet /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); /// /// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5 - /// net.add_layer(Box::new(Linear::new(3, 4))) - /// .add_layer(Box::new(Linear::new(4, 5))); + /// net.add(Box::new(Linear::new(3, 4))) + /// .add(Box::new(Linear::new(4, 5))); /// ``` - pub fn add_layer<'a>(&'a mut self, layer: Box) -> &'a mut NeuralNet { - self.base.add_layer(layer); + pub fn add<'a>(&'a mut self, layer: Box) -> &'a mut NeuralNet { + self.base.add(layer); self } + /// Adds multiple layers to the end of the network + fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut NeuralNet + where U: IntoIterator> { + self.base.add_layers(layers); + self + } + /// Gets matrix of weights between specified layer and forward layer. /// /// # Examples @@ -253,19 +261,28 @@ impl BaseNeuralNet { where U: ActivationFunc + 'static { let mut mlp = BaseNeuralNet::new(criterion); for shape in layer_sizes.windows(2) { - mlp.add_layer(Box::new(net_layer::Linear::new(shape[0], shape[1]))); - mlp.add_layer(Box::new(activ_fn.clone())); + mlp.add(Box::new(net_layer::Linear::new(shape[0], shape[1]))); + mlp.add(Box::new(activ_fn.clone())); } mlp } /// Adds the specified layer to the end of the network - fn add_layer<'a>(&'a mut self, layer: Box) -> &'a mut BaseNeuralNet { + fn add<'a>(&'a mut self, layer: Box) -> &'a mut BaseNeuralNet { self.weights.append(&mut layer.default_params()); self.layers.push(layer); self } + /// Adds multiple layers to the end of the network + fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut BaseNeuralNet + where U: IntoIterator> { + for layer in layers { + self.add(layer); + } + self + } + /// Creates initial weights for all neurons in the network. fn create_weights(layer_sizes: &[usize]) -> Vec { let mut between = range::Range::new(0f64, 1f64); diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index b5d71cc9..07b5a97c 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -125,11 +125,14 @@ impl NetLayer for Linear { impl NetLayer for T { /// Applys the activation function to each element of the input fn forward(&self, input: &Matrix, _: MatrixSlice) -> Matrix { - input.clone().apply(&T::func) + Matrix::new(input.rows(), input.cols(), + input.iter().map(|&x| T::func(x)).collect::>()) } fn back_input(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { - out_grad.elemul(&input.clone().apply(&T::func_grad)) + let in_grad = Matrix::new(input.rows(), input.cols(), + input.iter().map(|&x| T::func_grad(x)).collect::>()); + out_grad.elemul(&in_grad) } fn back_params(&self, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { From ac7d31a7a90179933014f71cbf4d13be1a24e49f Mon Sep 17 00:00:00 2001 From: NivenT Date: Wed, 21 Sep 2016 18:05:00 -0700 Subject: [PATCH 11/28] Removed clone from compute_grad --- src/learning/nnet/mod.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 4e1a0fea..ad003245 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -348,13 +348,12 @@ impl BaseNeuralNet { gradients.set_len(weights.len()); } //activations[0] is input and activations[i+1] is output of layer[i] - let mut activations = Vec::with_capacity(self.layers.len()+1); + let mut activations = Vec::with_capacity(self.layers.len()); // Forward propagation let mut index = 0; - activations.push(inputs.clone()); - for layer in &self.layers { + for (i, layer) in self.layers.iter().enumerate() { let shape = layer.param_shape(); let slice = unsafe { @@ -364,7 +363,12 @@ impl BaseNeuralNet { shape.1) }; - let output = layer.forward(activations.last().unwrap(), slice); + let output = if i == 0 { + layer.forward(inputs, slice) + } else { + layer.forward(activations.last().unwrap(), slice) + }; + activations.push(output); index += layer.num_params(); } @@ -373,7 +377,7 @@ impl BaseNeuralNet { // Backward propagation //The gradient with respect to the current layer's output - let mut out_grad = self.criterion.cost_grad(output, targets); + let mut out_grad = self.criterion.cost_grad(&output, targets); // at this point index == weights.len() for (i, layer) in self.layers.iter().enumerate().rev() { index -= layer.num_params(); @@ -386,13 +390,15 @@ impl BaseNeuralNet { shape.1) }; - let grad_params = layer.back_params(&out_grad, &activations[i], slice); - out_grad = layer.back_input(&out_grad, &activations[i], slice); + + let activation = if i == 0 {inputs} else {&activations[i-1]}; + let grad_params = layer.back_params(&out_grad, activation, slice); + out_grad = layer.back_input(&out_grad, activation, slice); gradients[index..index+layer.num_params()].copy_from_slice(&grad_params.data()); } - let cost = self.criterion.cost(output, targets); + let cost = self.criterion.cost(&output, targets); (cost, gradients) } From db35b1a81719b2e22b8676cc26fad03c2c298282 Mon Sep 17 00:00:00 2001 From: NivenT Date: Wed, 21 Sep 2016 18:41:45 -0700 Subject: [PATCH 12/28] Added add_layers example --- src/learning/nnet/mod.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index ad003245..c4247403 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -188,6 +188,7 @@ impl NeuralNet /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); /// /// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5 + /// // This net will not apply any activation function to the Linear layer outputs /// net.add(Box::new(Linear::new(3, 4))) /// .add(Box::new(Linear::new(4, 5))); /// ``` @@ -197,7 +198,28 @@ impl NeuralNet } /// Adds multiple layers to the end of the network - fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut NeuralNet + /// + /// # Examples + /// + /// ``` + /// use rusty_machine::linalg::BaseMatrix; + /// use rusty_machine::learning::nnet::BCECriterion; + /// use rusty_machine::learning::nnet::NeuralNet; + /// use rusty_machine::learning::nnet::net_layer::{NetLayer, Linear}; + /// use rusty_machine::learning::toolkit::activ_fn::Sigmoid; + /// use rusty_machine::learning::optim::grad_desc::StochasticGD; + /// + /// use std::clone::Clone; + /// + /// // Create a new neural net + /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); + /// + /// let linear_sig: Vec> = vec![Box::new(Linear::new(5, 5)), Box::new(Sigmoid)]; + /// + /// // Give net a layer of size 5, followed by a Sigmoid activation function + /// net.add_layers(linear_sig); + /// ``` + pub fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut NeuralNet where U: IntoIterator> { self.base.add_layers(layers); self From d96e99edbb859fe74e770c948b1a1e189f6b2256 Mon Sep 17 00:00:00 2001 From: NivenT Date: Sat, 24 Sep 2016 12:39:46 -0700 Subject: [PATCH 13/28] improved network performance --- benches/examples/k_means.rs | 4 +-- benches/examples/nnet.rs | 11 ++++--- benches/examples/svm.rs | 6 ++-- src/learning/nnet/mod.rs | 57 +++++++--------------------------- src/learning/nnet/net_layer.rs | 36 ++++++++++----------- 5 files changed, 41 insertions(+), 73 deletions(-) diff --git a/benches/examples/k_means.rs b/benches/examples/k_means.rs index 05dcda93..8a55698c 100644 --- a/benches/examples/k_means.rs +++ b/benches/examples/k_means.rs @@ -49,7 +49,7 @@ fn k_means_train(b: &mut Bencher) { b.iter(|| { let mut model = black_box(KMeansClassifier::new(2)); - model.train(&samples); + let _ = model.train(&samples); }); } @@ -64,7 +64,7 @@ fn k_means_predict(b: &mut Bencher) { let samples = generate_data(¢roids, SAMPLES_PER_CENTROID, 0.4); let mut model = KMeansClassifier::new(2); - model.train(&samples); + let _ = model.train(&samples); b.iter(|| { let _ = black_box(model.centroids().as_ref().unwrap()); let _ = black_box(model.predict(&samples)); diff --git a/benches/examples/nnet.rs b/benches/examples/nnet.rs index cc567339..faef9892 100644 --- a/benches/examples/nnet.rs +++ b/benches/examples/nnet.rs @@ -5,6 +5,7 @@ use std::vec::Vec; use rusty_machine::learning::nnet::{NeuralNet, BCECriterion}; use rusty_machine::learning::toolkit::regularization::Regularization; +use rusty_machine::learning::toolkit::activ_fn::Sigmoid; use rusty_machine::learning::optim::grad_desc::StochasticGD; use rusty_machine::linalg::Matrix; @@ -51,8 +52,8 @@ fn nnet_and_gate_train(b: &mut Bencher) { let criterion = BCECriterion::new(Regularization::L2(0.)); b.iter(|| { - let mut model = black_box(NeuralNet::new(layers, criterion, StochasticGD::default())); - model.train(&inputs, &targets); + let mut model = black_box(NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid)); + let _ = model.train(&inputs, &targets); }) } @@ -62,10 +63,10 @@ fn nnet_and_gate_predict(b: &mut Bencher) { let layers = &[2, 1]; let criterion = BCECriterion::new(Regularization::L2(0.)); - let mut model = NeuralNet::new(layers, criterion, StochasticGD::default()); - model.train(&inputs, &targets); + let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid); + let _ = model.train(&inputs, &targets); b.iter(|| { - model.predict(&test_inputs); + let _ = model.predict(&test_inputs); }) } diff --git a/benches/examples/svm.rs b/benches/examples/svm.rs index 1f69eaf4..d6e6e8db 100644 --- a/benches/examples/svm.rs +++ b/benches/examples/svm.rs @@ -35,7 +35,7 @@ fn svm_sign_learner_train(b: &mut Bencher) { // Trainee b.iter(|| { let mut svm_mod = black_box(SVM::new(HyperTan::new(100., 0.), 0.3)); - svm_mod.train(&inputs, &targets); + let _ = svm_mod.train(&inputs, &targets); }); } @@ -46,8 +46,8 @@ fn svm_sign_learner_predict(b: &mut Bencher) { let test_data = (-1000..1000).filter(|&x| x % 100 == 0).map(|x| x as f64).collect::>(); let test_inputs = Matrix::new(test_data.len(), 1, test_data); let mut svm_mod = SVM::new(HyperTan::new(100., 0.), 0.3); - svm_mod.train(&inputs, &targets); + let _ = svm_mod.train(&inputs, &targets); b.iter(|| { - svm_mod.predict(&test_inputs); + let _ = svm_mod.predict(&test_inputs); }); } \ No newline at end of file diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index c4247403..0696a0c7 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -45,10 +45,9 @@ pub mod net_layer; -use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut}; +use linalg::{Matrix, MatrixSlice, BaseMatrixMut}; use learning::{LearningResult, SupModel}; -use learning::error::{Error, ErrorKind}; use learning::toolkit::activ_fn; use learning::toolkit::activ_fn::ActivationFunc; use learning::toolkit::cost_fn; @@ -57,9 +56,6 @@ use learning::toolkit::regularization::Regularization; use learning::optim::{Optimizable, OptimAlgorithm}; use learning::optim::grad_desc::StochasticGD; -use rand::thread_rng; -use rand::distributions::{Sample, range}; - use std::fmt::Debug; use std::iter::IntoIterator; @@ -305,22 +301,6 @@ impl BaseNeuralNet { self } - /// Creates initial weights for all neurons in the network. - fn create_weights(layer_sizes: &[usize]) -> Vec { - let mut between = range::Range::new(0f64, 1f64); - let mut rng = thread_rng(); - layer_sizes.windows(2) - .flat_map(|w| { - let l_in = w[0] + 1; - let l_out = w[1]; - let eps_init = (6f64 / (l_in + l_out) as f64).sqrt(); - (0..l_in * l_out) - .map(|_i| (between.sample(&mut rng) * 2f64 * eps_init) - eps_init) - .collect::>() - }) - .collect() - } - /// Gets matrix of weights for the specified layer for the weights. fn get_layer_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice { debug_assert!(idx < self.layers.len()); @@ -347,18 +327,6 @@ impl BaseNeuralNet { } } - /// Gets matrix of weights between specified layer and forward layer - /// for the base model. - fn get_net_weights(&self, idx: usize) -> MatrixSlice { - self.get_layer_weights(&self.weights[..], idx) - } - - /// Gets the weights for a layer excluding the bias weights. - fn get_non_bias_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice { - let layer_weights = self.get_layer_weights(weights, idx); - layer_weights.reslice([1, 0], layer_weights.rows() - 1, layer_weights.cols()) - } - /// Compute the gradient using the back propagation algorithm. fn compute_grad(&self, weights: &[f64], @@ -426,31 +394,30 @@ impl BaseNeuralNet { /// Forward propagation of the model weights to get the outputs. fn forward_prop(&self, inputs: &Matrix) -> LearningResult> { - let mut index = 0; if self.layers.len() == 0 { return Ok(inputs.clone()); } + let mut ptr = self.weights.as_ptr(); let mut outputs = unsafe { let shape = self.layers[0].param_shape(); - let slice = MatrixSlice::from_raw_parts(self.weights.as_ptr(), + let slice = MatrixSlice::from_raw_parts(ptr, shape.0, shape.1, shape.1); + ptr = ptr.offset(self.layers[0].num_params() as isize); self.layers[0].forward(inputs, slice) }; for layer in self.layers.iter().skip(1) { let shape = layer.param_shape(); - - let slice = unsafe { - MatrixSlice::from_raw_parts(self.weights.as_ptr().offset(index as isize), - shape.0, - shape.1, - shape.1) - }; - - outputs = layer.forward(&outputs, slice); - index += layer.num_params(); + unsafe { + let slice = MatrixSlice::from_raw_parts(ptr, + shape.0, + shape.1, + shape.1); + outputs = layer.forward(&outputs, slice); + ptr = ptr.offset(layer.num_params() as isize); + } } Ok(outputs) } diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index 07b5a97c..5b1777f4 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -24,11 +24,14 @@ pub trait NetLayer : Debug { /// The default value of the parameters of this layer before training fn default_params(&self) -> Vec; - /// The number of parameters used by this layer - fn num_params(&self) -> usize; - /// The shape of the parameters used by this layer fn param_shape(&self) -> (usize, usize); + + /// The number of parameters used by this layer + fn num_params(&self) -> usize { + let shape = self.param_shape(); + shape.0 * shape.1 + } } /// Linear network layer @@ -84,19 +87,23 @@ impl NetLayer for Linear { fn back_input(&self, out_grad: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { assert_eq!(out_grad.cols(), params.cols()); - let gradient = out_grad * ¶ms.into_matrix().transpose(); + //let gradient = out_grad * ¶ms.into_matrix().transpose(); if self.has_bias { - let columns: Vec<_> = (0..gradient.cols()-1).collect(); - gradient.select_cols(&columns) + //let columns: Vec<_> = (0..gradient.cols()-1).collect(); + //gradient.select_cols(&columns) + let rows: Vec<_> = (0..params.cols()-1).collect(); + out_grad * ¶ms.into_matrix().select_rows(&rows).transpose() } else { - gradient + //gradient + out_grad * ¶ms.into_matrix().transpose() } } fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { assert_eq!(input.rows(), out_grad.rows()); if self.has_bias { - input.transpose().vcat(&Matrix::::ones(1, input.rows())) * out_grad + //input.transpose().vcat(&Matrix::::ones(1, input.rows())) * out_grad + input.hcat(&Matrix::::ones(input.rows(), 1)).transpose() * out_grad } else { input.transpose() * out_grad } @@ -113,10 +120,6 @@ impl NetLayer for Linear { .collect() } - fn num_params(&self) -> usize { - self.input_size * self.output_size - } - fn param_shape(&self) -> (usize, usize) { (self.input_size, self.output_size) } @@ -125,8 +128,9 @@ impl NetLayer for Linear { impl NetLayer for T { /// Applys the activation function to each element of the input fn forward(&self, input: &Matrix, _: MatrixSlice) -> Matrix { - Matrix::new(input.rows(), input.cols(), - input.iter().map(|&x| T::func(x)).collect::>()) + //Matrix::new(input.rows(), input.cols(), + // input.iter().map(|&x| T::func(x)).collect::>()); + input.clone().apply(&T::func) } fn back_input(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { @@ -143,10 +147,6 @@ impl NetLayer for T { Vec::new() } - fn num_params(&self) -> usize { - 0 - } - fn param_shape(&self) -> (usize, usize) { (0, 0) } From 59b280b0c3dff394aadd3528e6c0f57f683f31fd Mon Sep 17 00:00:00 2001 From: NivenT Date: Sun, 25 Sep 2016 16:18:04 -0700 Subject: [PATCH 14/28] Fixed doc test error --- src/learning/nnet/net_layer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index 5b1777f4..6948bb8d 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -91,7 +91,7 @@ impl NetLayer for Linear { if self.has_bias { //let columns: Vec<_> = (0..gradient.cols()-1).collect(); //gradient.select_cols(&columns) - let rows: Vec<_> = (0..params.cols()-1).collect(); + let rows: Vec<_> = (0..params.rows()-1).collect(); out_grad * ¶ms.into_matrix().select_rows(&rows).transpose() } else { //gradient From 38bd334af19e5aef245f89c657b81c5d88ca85b4 Mon Sep 17 00:00:00 2001 From: NivenT Date: Mon, 26 Sep 2016 16:23:36 -0700 Subject: [PATCH 15/28] backprop uses regularization --- src/learning/nnet/mod.rs | 37 +++++++++++++++++++--------------- src/learning/nnet/net_layer.rs | 6 +++--- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 0696a0c7..e3deed91 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -46,6 +46,7 @@ pub mod net_layer; use linalg::{Matrix, MatrixSlice, BaseMatrixMut}; +use rulinalg::utils; use learning::{LearningResult, SupModel}; use learning::toolkit::activ_fn; @@ -57,7 +58,6 @@ use learning::optim::{Optimizable, OptimAlgorithm}; use learning::optim::grad_desc::StochasticGD; use std::fmt::Debug; -use std::iter::IntoIterator; use self::net_layer::NetLayer; @@ -337,8 +337,10 @@ impl BaseNeuralNet { unsafe { gradients.set_len(weights.len()); } - //activations[0] is input and activations[i+1] is output of layer[i] + // activations[i] is output of layer[i] let mut activations = Vec::with_capacity(self.layers.len()); + // params[i] is the weights for layer[i] + let mut params = Vec::with_capacity(self.layers.len()); // Forward propagation @@ -360,35 +362,38 @@ impl BaseNeuralNet { }; activations.push(output); + params.push(slice); index += layer.num_params(); } let output = activations.last().unwrap(); // Backward propagation - //The gradient with respect to the current layer's output + // The gradient with respect to the current layer's output let mut out_grad = self.criterion.cost_grad(&output, targets); // at this point index == weights.len() for (i, layer) in self.layers.iter().enumerate().rev() { - index -= layer.num_params(); - let shape = layer.param_shape(); - - let slice = unsafe { - MatrixSlice::from_raw_parts(weights.as_ptr().offset(index as isize), - shape.0, - shape.1, - shape.1) - }; + let activation = if i == 0 {inputs} else {&activations[i-1]}; + let mut grad_params = layer.back_params(&out_grad, activation, params[i]); + if self.criterion.is_regularized() { + utils::in_place_vec_bin_op(grad_params.mut_data(), self.criterion.reg_cost_grad(params[i]).data(), |x, &y| { + *x = *x + y + }); + } - let activation = if i == 0 {inputs} else {&activations[i-1]}; - let grad_params = layer.back_params(&out_grad, activation, slice); - out_grad = layer.back_input(&out_grad, activation, slice); + out_grad = layer.back_input(&out_grad, activation, params[i]); + index -= layer.num_params(); gradients[index..index+layer.num_params()].copy_from_slice(&grad_params.data()); } - let cost = self.criterion.cost(&output, targets); + let mut cost = self.criterion.cost(&output, targets); + if self.criterion.is_regularized() { + for i in 0..self.layers.len() { + cost += self.criterion.reg_cost(params[i]); + } + } (cost, gradients) } diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index 6948bb8d..9979011b 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -77,16 +77,16 @@ impl NetLayer for Linear { /// where N is the number of samples and I is the dimensionality of the input fn forward(&self, input: &Matrix, params: MatrixSlice) -> Matrix { if self.has_bias { - assert_eq!(input.cols()+1, params.rows()); + debug_assert_eq!(input.cols()+1, params.rows()); input.hcat(&Matrix::::ones(input.rows(), 1)) * ¶ms } else { - assert_eq!(input.cols(), params.rows()); + debug_assert_eq!(input.cols(), params.rows()); input * ¶ms } } fn back_input(&self, out_grad: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { - assert_eq!(out_grad.cols(), params.cols()); + debug_assert_eq!(out_grad.cols(), params.cols()); //let gradient = out_grad * ¶ms.into_matrix().transpose(); if self.has_bias { //let columns: Vec<_> = (0..gradient.cols()-1).collect(); From 90e6a8c83b080065ba433ff03520183916da1ac8 Mon Sep 17 00:00:00 2001 From: NivenT Date: Tue, 4 Oct 2016 11:16:33 -0700 Subject: [PATCH 16/28] Added error handling to neural nets --- src/learning/nnet/mod.rs | 21 +++++++++++++++------ src/learning/nnet/net_layer.rs | 28 ++++++++++++++++++---------- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index e3deed91..3f4600c1 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -49,6 +49,7 @@ use linalg::{Matrix, MatrixSlice, BaseMatrixMut}; use rulinalg::utils; use learning::{LearningResult, SupModel}; +use learning::error::{Error, ErrorKind}; use learning::toolkit::activ_fn; use learning::toolkit::activ_fn::ActivationFunc; use learning::toolkit::cost_fn; @@ -277,7 +278,11 @@ impl BaseNeuralNet { /// Create a multilayer perceptron with the specified layer sizes. fn mlp<'a, U>(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet where U: ActivationFunc + 'static { - let mut mlp = BaseNeuralNet::new(criterion); + let mut mlp = BaseNeuralNet { + layers: Vec::with_capacity(2*(layer_sizes.len()-1)), + weights: Vec::new(), + criterion: criterion + }; for shape in layer_sizes.windows(2) { mlp.add(Box::new(net_layer::Linear::new(shape[0], shape[1]))); mlp.add(Box::new(activ_fn.clone())); @@ -356,9 +361,9 @@ impl BaseNeuralNet { }; let output = if i == 0 { - layer.forward(inputs, slice) + layer.forward(inputs, slice).unwrap() } else { - layer.forward(activations.last().unwrap(), slice) + layer.forward(activations.last().unwrap(), slice).unwrap() }; activations.push(output); @@ -374,8 +379,8 @@ impl BaseNeuralNet { // at this point index == weights.len() for (i, layer) in self.layers.iter().enumerate().rev() { let activation = if i == 0 {inputs} else {&activations[i-1]}; - let mut grad_params = layer.back_params(&out_grad, activation, params[i]); + let mut grad_params = layer.back_params(&out_grad, activation, params[i]); if self.criterion.is_regularized() { utils::in_place_vec_bin_op(grad_params.mut_data(), self.criterion.reg_cost_grad(params[i]).data(), |x, &y| { *x = *x + y @@ -411,7 +416,7 @@ impl BaseNeuralNet { shape.1, shape.1); ptr = ptr.offset(self.layers[0].num_params() as isize); - self.layers[0].forward(inputs, slice) + try!(self.layers[0].forward(inputs, slice)) }; for layer in self.layers.iter().skip(1) { let shape = layer.param_shape(); @@ -420,7 +425,11 @@ impl BaseNeuralNet { shape.0, shape.1, shape.1); - outputs = layer.forward(&outputs, slice); + outputs = match layer.forward(&outputs, slice) { + Ok(act) => act, + Err(_) => {return Err(Error::new(ErrorKind::InvalidParameters, + "The network's layers do not line up correctly."))} + }; ptr = ptr.offset(layer.num_params() as isize); } } diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index 9979011b..61e31210 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -2,6 +2,8 @@ use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut}; +use learning::LearningResult; +use learning::error::{Error, ErrorKind}; use learning::toolkit::activ_fn::ActivationFunc; use rand::thread_rng; @@ -13,7 +15,7 @@ use std::fmt::Debug; /// Trait for neural net layers pub trait NetLayer : Debug { /// The result of propogating data forward through this layer - fn forward(&self, input: &Matrix, params: MatrixSlice) -> Matrix; + fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult>; /// The gradient of the output of this layer with respect to its input fn back_input(&self, out_grad: &Matrix, input: &Matrix, params: MatrixSlice) -> Matrix; @@ -75,13 +77,19 @@ impl NetLayer for Linear { /// /// input should have dimensions N x I /// where N is the number of samples and I is the dimensionality of the input - fn forward(&self, input: &Matrix, params: MatrixSlice) -> Matrix { + fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult> { if self.has_bias { - debug_assert_eq!(input.cols()+1, params.rows()); - input.hcat(&Matrix::::ones(input.rows(), 1)) * ¶ms + if input.cols()+1 != params.rows() { + Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) + } else { + Ok(input.hcat(&Matrix::::ones(input.rows(), 1)) * ¶ms) + } } else { - debug_assert_eq!(input.cols(), params.rows()); - input * ¶ms + if input.cols() != params.rows() { + Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) + } else { + Ok(input * ¶ms) + } } } @@ -100,7 +108,7 @@ impl NetLayer for Linear { } fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { - assert_eq!(input.rows(), out_grad.rows()); + debug_assert_eq!(input.rows(), out_grad.rows()); if self.has_bias { //input.transpose().vcat(&Matrix::::ones(1, input.rows())) * out_grad input.hcat(&Matrix::::ones(input.rows(), 1)).transpose() * out_grad @@ -127,10 +135,10 @@ impl NetLayer for Linear { impl NetLayer for T { /// Applys the activation function to each element of the input - fn forward(&self, input: &Matrix, _: MatrixSlice) -> Matrix { + fn forward(&self, input: &Matrix, _: MatrixSlice) -> LearningResult> { //Matrix::new(input.rows(), input.cols(), - // input.iter().map(|&x| T::func(x)).collect::>()); - input.clone().apply(&T::func) + // input.iter().map(|&x| T::func(x)).collect::>()) + Ok(input.clone().apply(&T::func)) } fn back_input(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { From 9d056292d130546a6f3dcd6ee7bd6fe77eb3823f Mon Sep 17 00:00:00 2001 From: NivenT Date: Fri, 7 Oct 2016 22:28:36 -0700 Subject: [PATCH 17/28] Improve neural network performance --- src/learning/nnet/mod.rs | 22 +++++++++++----------- src/learning/nnet/net_layer.rs | 29 +++++++++++++---------------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 3f4600c1..093a0027 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -342,7 +342,7 @@ impl BaseNeuralNet { unsafe { gradients.set_len(weights.len()); } - // activations[i] is output of layer[i] + // activations[i] is the output of layer[i] let mut activations = Vec::with_capacity(self.layers.len()); // params[i] is the weights for layer[i] let mut params = Vec::with_capacity(self.layers.len()); @@ -370,30 +370,30 @@ impl BaseNeuralNet { params.push(slice); index += layer.num_params(); } - let output = activations.last().unwrap(); + let output = &activations[activations.len()-1]; // Backward propagation - + // The gradient with respect to the current layer's output - let mut out_grad = self.criterion.cost_grad(&output, targets); + let mut out_grad = self.criterion.cost_grad(output, targets); // at this point index == weights.len() for (i, layer) in self.layers.iter().enumerate().rev() { let activation = if i == 0 {inputs} else {&activations[i-1]}; + index -= layer.num_params(); - let mut grad_params = layer.back_params(&out_grad, activation, params[i]); + let grad_params = &mut gradients[index..index+layer.num_params()]; + grad_params.copy_from_slice(&layer.back_params(&out_grad, activation, params[i]).data()); + if self.criterion.is_regularized() { - utils::in_place_vec_bin_op(grad_params.mut_data(), self.criterion.reg_cost_grad(params[i]).data(), |x, &y| { + utils::in_place_vec_bin_op(grad_params, self.criterion.reg_cost_grad(params[i]).data(), |x, &y| { *x = *x + y }); } - + out_grad = layer.back_input(&out_grad, activation, params[i]); - - index -= layer.num_params(); - gradients[index..index+layer.num_params()].copy_from_slice(&grad_params.data()); } - let mut cost = self.criterion.cost(&output, targets); + let mut cost = self.criterion.cost(output, targets); if self.criterion.is_regularized() { for i in 0..self.layers.len() { cost += self.criterion.reg_cost(params[i]); diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index 61e31210..ce37afa7 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -1,6 +1,7 @@ //!Neural Network Layers use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut}; +use rulinalg::utils; use learning::LearningResult; use learning::error::{Error, ErrorKind}; @@ -82,7 +83,7 @@ impl NetLayer for Linear { if input.cols()+1 != params.rows() { Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) } else { - Ok(input.hcat(&Matrix::::ones(input.rows(), 1)) * ¶ms) + Ok(&input.hcat(&Matrix::ones(input.rows(), 1)) * ¶ms) } } else { if input.cols() != params.rows() { @@ -95,25 +96,21 @@ impl NetLayer for Linear { fn back_input(&self, out_grad: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { debug_assert_eq!(out_grad.cols(), params.cols()); - //let gradient = out_grad * ¶ms.into_matrix().transpose(); + let gradient = out_grad * ¶ms.transpose(); if self.has_bias { - //let columns: Vec<_> = (0..gradient.cols()-1).collect(); - //gradient.select_cols(&columns) - let rows: Vec<_> = (0..params.rows()-1).collect(); - out_grad * ¶ms.into_matrix().select_rows(&rows).transpose() + let columns: Vec<_> = (0..gradient.cols()-1).collect(); + gradient.select_cols(&columns) } else { - //gradient - out_grad * ¶ms.into_matrix().transpose() + gradient } } fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { debug_assert_eq!(input.rows(), out_grad.rows()); if self.has_bias { - //input.transpose().vcat(&Matrix::::ones(1, input.rows())) * out_grad - input.hcat(&Matrix::::ones(input.rows(), 1)).transpose() * out_grad + &input.hcat(&Matrix::ones(input.rows(), 1)).transpose() * out_grad } else { - input.transpose() * out_grad + &input.transpose() * out_grad } } @@ -136,15 +133,15 @@ impl NetLayer for Linear { impl NetLayer for T { /// Applys the activation function to each element of the input fn forward(&self, input: &Matrix, _: MatrixSlice) -> LearningResult> { - //Matrix::new(input.rows(), input.cols(), - // input.iter().map(|&x| T::func(x)).collect::>()) Ok(input.clone().apply(&T::func)) } fn back_input(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { - let in_grad = Matrix::new(input.rows(), input.cols(), - input.iter().map(|&x| T::func_grad(x)).collect::>()); - out_grad.elemul(&in_grad) + let mut in_grad = input.clone(); + utils::in_place_vec_bin_op(in_grad.mut_data(), out_grad.data(), |x, &y| { + *x = T::func_grad(*x) * y + }); + in_grad } fn back_params(&self, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { From d555bef66ed85af0ba7f4fd52464f4bc634838ac Mon Sep 17 00:00:00 2001 From: NivenT Date: Fri, 7 Oct 2016 22:30:44 -0700 Subject: [PATCH 18/28] removed ActFunc from Criterion --- src/learning/nnet/mod.rs | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 093a0027..4cc42afa 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -45,7 +45,7 @@ pub mod net_layer; -use linalg::{Matrix, MatrixSlice, BaseMatrixMut}; +use linalg::{Matrix, MatrixSlice}; use rulinalg::utils; use learning::{LearningResult, SupModel}; @@ -383,7 +383,7 @@ impl BaseNeuralNet { let grad_params = &mut gradients[index..index+layer.num_params()]; grad_params.copy_from_slice(&layer.back_params(&out_grad, activation, params[i]).data()); - + if self.criterion.is_regularized() { utils::in_place_vec_bin_op(grad_params, self.criterion.reg_cost_grad(params[i]).data(), |x, &y| { *x = *x + y @@ -457,21 +457,9 @@ impl Optimizable for BaseNeuralNet { /// /// Specifies an activation function and a cost function. pub trait Criterion { - /// The activation function for the criterion. - type ActFunc: ActivationFunc + Debug; /// The cost function for the criterion. type Cost: CostFunc>; - /// The activation function applied to a matrix. - fn activate(&self, mat: Matrix) -> Matrix { - mat.apply(&Self::ActFunc::func) - } - - /// The gradient of the activation function applied to a matrix. - fn grad_activ(&self, mat: Matrix) -> Matrix { - mat.apply(&Self::ActFunc::func_grad) - } - /// The cost function. /// /// Returns a scalar cost. @@ -534,7 +522,6 @@ pub struct BCECriterion { } impl Criterion for BCECriterion { - type ActFunc = activ_fn::Sigmoid; type Cost = cost_fn::CrossEntropyError; fn regularization(&self) -> Regularization { @@ -576,7 +563,6 @@ pub struct MSECriterion { } impl Criterion for MSECriterion { - type ActFunc = activ_fn::Linear; type Cost = cost_fn::MeanSqError; fn regularization(&self) -> Regularization { From 999524e5e88d1a8ba6164ed554bf6ca0cbd4b2f7 Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Sat, 8 Oct 2016 16:57:21 +0800 Subject: [PATCH 19/28] Replace select_cols with simpler sub_slice.into plus some reformatting --- src/learning/nnet/mod.rs | 35 ++--- src/learning/nnet/net_layer.rs | 243 +++++++++++++++++---------------- 2 files changed, 137 insertions(+), 141 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 4cc42afa..13dd2cca 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -217,9 +217,9 @@ impl NeuralNet /// net.add_layers(linear_sig); /// ``` pub fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut NeuralNet - where U: IntoIterator> { - self.base.add_layers(layers); - self + where U: IntoIterator> { + self.base.add_layers(layers); + self } /// Gets matrix of weights between specified layer and forward layer. @@ -247,7 +247,7 @@ impl NeuralNet /// Base Neural Network struct /// -/// This struct cannot be instantianated and is used internally only. +/// This struct cannot be instantiated and is used internally only. #[derive(Debug)] pub struct BaseNeuralNet { layers: Vec>, @@ -299,11 +299,12 @@ impl BaseNeuralNet { /// Adds multiple layers to the end of the network fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut BaseNeuralNet - where U: IntoIterator> { - for layer in layers { - self.add(layer); - } - self + where U: IntoIterator> + { + for layer in layers { + self.add(layer); + } + self } /// Gets matrix of weights for the specified layer for the weights. @@ -311,17 +312,11 @@ impl BaseNeuralNet { debug_assert!(idx < self.layers.len()); // Check that the weights are the right size. - let mut full_size = 0usize; - for l in &self.layers { - full_size += l.num_params(); - } + let full_size: usize = self.layers.iter().map(|l| l.num_params()).sum(); debug_assert_eq!(full_size, weights.len()); - let mut start = 0usize; - for l in &self.layers[..idx] { - start += l.num_params(); - } + let start: usize = self.layers.iter().take(idx).map(|l| l.num_params()).sum(); let shape = self.layers[idx].param_shape(); unsafe { @@ -361,9 +356,9 @@ impl BaseNeuralNet { }; let output = if i == 0 { - layer.forward(inputs, slice).unwrap() + layer.forward(inputs, slice).unwrap() } else { - layer.forward(activations.last().unwrap(), slice).unwrap() + layer.forward(activations.last().unwrap(), slice).unwrap() }; activations.push(output); @@ -404,7 +399,7 @@ impl BaseNeuralNet { /// Forward propagation of the model weights to get the outputs. fn forward_prop(&self, inputs: &Matrix) -> LearningResult> { - if self.layers.len() == 0 { + if self.layers.is_empty() { return Ok(inputs.clone()); } diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index ce37afa7..cbf6e2be 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -15,26 +15,26 @@ use std::fmt::Debug; /// Trait for neural net layers pub trait NetLayer : Debug { - /// The result of propogating data forward through this layer - fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult>; - - /// The gradient of the output of this layer with respect to its input - fn back_input(&self, out_grad: &Matrix, input: &Matrix, params: MatrixSlice) -> Matrix; - - /// The gradient of the output of this layer with respect to its parameters - fn back_params(&self, out_grad: &Matrix, input: &Matrix, params: MatrixSlice) -> Matrix; - - /// The default value of the parameters of this layer before training - fn default_params(&self) -> Vec; - - /// The shape of the parameters used by this layer - fn param_shape(&self) -> (usize, usize); - - /// The number of parameters used by this layer - fn num_params(&self) -> usize { - let shape = self.param_shape(); - shape.0 * shape.1 - } + /// The result of propogating data forward through this layer + fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult>; + + /// The gradient of the output of this layer with respect to its input + fn back_input(&self, out_grad: &Matrix, input: &Matrix, params: MatrixSlice) -> Matrix; + + /// The gradient of the output of this layer with respect to its parameters + fn back_params(&self, out_grad: &Matrix, input: &Matrix, params: MatrixSlice) -> Matrix; + + /// The default value of the parameters of this layer before training + fn default_params(&self) -> Vec; + + /// The shape of the parameters used by this layer + fn param_shape(&self) -> (usize, usize); + + /// The number of parameters used by this layer + fn num_params(&self) -> usize { + let shape = self.param_shape(); + shape.0 * shape.1 + } } /// Linear network layer @@ -44,115 +44,116 @@ pub trait NetLayer : Debug { /// The parameters are a matrix of weights of size I x O /// where O is the dimensionality of the output and I the dimensionality of the input #[derive(Debug, Clone, Copy)] -pub struct Linear { - /// The number of dimensions of the input - input_size: usize, - /// The number of dimensions of the output - output_size: usize, - /// Whether or not to include a bias term - has_bias: bool, +pub struct Linear { + /// The number of dimensions of the input + input_size: usize, + /// The number of dimensions of the output + output_size: usize, + /// Whether or not to include a bias term + has_bias: bool, } impl Linear { - /// Construct a new Linear layer - pub fn new(input_size: usize, output_size: usize) -> Linear { - Linear { - input_size: input_size + 1, - output_size: output_size, - has_bias: true - } - } - - /// Construct a Linear layer with a bias term - pub fn without_bias(input_size: usize, output_size: usize) -> Linear { - Linear { - input_size: input_size, - output_size: output_size, - has_bias: false - } - } + /// Construct a new Linear layer + pub fn new(input_size: usize, output_size: usize) -> Linear { + Linear { + input_size: input_size + 1, + output_size: output_size, + has_bias: true + } + } + + /// Construct a Linear layer with a bias term + pub fn without_bias(input_size: usize, output_size: usize) -> Linear { + Linear { + input_size: input_size, + output_size: output_size, + has_bias: false + } + } } impl NetLayer for Linear { - /// Computes a matrix product - /// - /// input should have dimensions N x I - /// where N is the number of samples and I is the dimensionality of the input - fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult> { - if self.has_bias { - if input.cols()+1 != params.rows() { - Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) - } else { - Ok(&input.hcat(&Matrix::ones(input.rows(), 1)) * ¶ms) - } - } else { - if input.cols() != params.rows() { - Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) - } else { - Ok(input * ¶ms) - } - } - } - - fn back_input(&self, out_grad: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { - debug_assert_eq!(out_grad.cols(), params.cols()); - let gradient = out_grad * ¶ms.transpose(); - if self.has_bias { - let columns: Vec<_> = (0..gradient.cols()-1).collect(); - gradient.select_cols(&columns) - } else { - gradient - } - } - - fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { - debug_assert_eq!(input.rows(), out_grad.rows()); - if self.has_bias { - &input.hcat(&Matrix::ones(input.rows(), 1)).transpose() * out_grad - } else { - &input.transpose() * out_grad - } - } - - /// Initializes weights using Xavier initialization - /// - /// weights drawn from gaussian distribution with 0 mean and variance 2/(input_size+output_size) - fn default_params(&self) -> Vec { - let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt()); - let mut rng = thread_rng(); - - (0..self.input_size*self.output_size).map(|_| distro.sample(&mut rng)) - .collect() - } - - fn param_shape(&self) -> (usize, usize) { - (self.input_size, self.output_size) - } + /// Computes a matrix product + /// + /// input should have dimensions N x I + /// where N is the number of samples and I is the dimensionality of the input + fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult> { + if self.has_bias { + if input.cols()+1 != params.rows() { + Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) + } else { + Ok(&input.hcat(&Matrix::ones(input.rows(), 1)) * ¶ms) + } + } else { + if input.cols() != params.rows() { + Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) + } else { + Ok(input * ¶ms) + } + } + } + + fn back_input(&self, out_grad: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { + debug_assert_eq!(out_grad.cols(), params.cols()); + let gradient = out_grad * ¶ms.transpose(); + if self.has_bias { + let rows = gradient.rows(); + let cols = gradient.cols() - 1; + gradient.sub_slice([0, 0], rows, cols).into() + } else { + gradient + } + } + + fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { + debug_assert_eq!(input.rows(), out_grad.rows()); + if self.has_bias { + &input.hcat(&Matrix::ones(input.rows(), 1)).transpose() * out_grad + } else { + &input.transpose() * out_grad + } + } + + /// Initializes weights using Xavier initialization + /// + /// weights drawn from gaussian distribution with 0 mean and variance 2/(input_size+output_size) + fn default_params(&self) -> Vec { + let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt()); + let mut rng = thread_rng(); + + (0..self.input_size*self.output_size).map(|_| distro.sample(&mut rng)) + .collect() + } + + fn param_shape(&self) -> (usize, usize) { + (self.input_size, self.output_size) + } } impl NetLayer for T { - /// Applys the activation function to each element of the input - fn forward(&self, input: &Matrix, _: MatrixSlice) -> LearningResult> { - Ok(input.clone().apply(&T::func)) - } - - fn back_input(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { - let mut in_grad = input.clone(); - utils::in_place_vec_bin_op(in_grad.mut_data(), out_grad.data(), |x, &y| { + /// Applys the activation function to each element of the input + fn forward(&self, input: &Matrix, _: MatrixSlice) -> LearningResult> { + Ok(input.clone().apply(&T::func)) + } + + fn back_input(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { + let mut in_grad = input.clone(); + utils::in_place_vec_bin_op(in_grad.mut_data(), out_grad.data(), |x, &y| { *x = T::func_grad(*x) * y }); - in_grad - } - - fn back_params(&self, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { - Matrix::new(0, 0, Vec::new()) - } - - fn default_params(&self) -> Vec { - Vec::new() - } - - fn param_shape(&self) -> (usize, usize) { - (0, 0) - } -} \ No newline at end of file + in_grad + } + + fn back_params(&self, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { + Matrix::new(0, 0, Vec::new()) + } + + fn default_params(&self) -> Vec { + Vec::new() + } + + fn param_shape(&self) -> (usize, usize) { + (0, 0) + } +} From af179955331483efb053c0faf991c47046f2ca1e Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Mon, 10 Oct 2016 00:48:00 +0800 Subject: [PATCH 20/28] extend_from_slice instead of append --- src/learning/nnet/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 13dd2cca..a9ff43a8 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -292,7 +292,7 @@ impl BaseNeuralNet { /// Adds the specified layer to the end of the network fn add<'a>(&'a mut self, layer: Box) -> &'a mut BaseNeuralNet { - self.weights.append(&mut layer.default_params()); + self.weights.extend_from_slice(&layer.default_params()); self.layers.push(layer); self } From 4c7ed37c3ad83b00ab2b52de29ccb8c037c71a6b Mon Sep 17 00:00:00 2001 From: NivenT Date: Fri, 14 Oct 2016 10:50:57 -0700 Subject: [PATCH 21/28] bias appended to beginning of inputs --- src/learning/nnet/net_layer.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index cbf6e2be..3bc70001 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -83,7 +83,7 @@ impl NetLayer for Linear { if input.cols()+1 != params.rows() { Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) } else { - Ok(&input.hcat(&Matrix::ones(input.rows(), 1)) * ¶ms) + Ok(&Matrix::ones(input.rows(), 1).hcat(input) * ¶ms) } } else { if input.cols() != params.rows() { @@ -100,7 +100,7 @@ impl NetLayer for Linear { if self.has_bias { let rows = gradient.rows(); let cols = gradient.cols() - 1; - gradient.sub_slice([0, 0], rows, cols).into() + gradient.sub_slice([0, 1], rows, cols).into() } else { gradient } @@ -109,7 +109,7 @@ impl NetLayer for Linear { fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { debug_assert_eq!(input.rows(), out_grad.rows()); if self.has_bias { - &input.hcat(&Matrix::ones(input.rows(), 1)).transpose() * out_grad + &Matrix::ones(input.rows(), 1).hcat(input).transpose() * out_grad } else { &input.transpose() * out_grad } From fcca7b72c3898689d3eb20fe90b7ca947796d65f Mon Sep 17 00:00:00 2001 From: NivenT Date: Wed, 26 Oct 2016 02:06:25 -0700 Subject: [PATCH 22/28] Compute regularization cost/grad in one step --- src/learning/nnet/mod.rs | 16 +++++++--------- src/learning/nnet/net_layer.rs | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index a9ff43a8..1161bd71 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -378,21 +378,19 @@ impl BaseNeuralNet { let grad_params = &mut gradients[index..index+layer.num_params()]; grad_params.copy_from_slice(&layer.back_params(&out_grad, activation, params[i]).data()); - - if self.criterion.is_regularized() { - utils::in_place_vec_bin_op(grad_params, self.criterion.reg_cost_grad(params[i]).data(), |x, &y| { - *x = *x + y - }); - } out_grad = layer.back_input(&out_grad, activation, params[i]); } let mut cost = self.criterion.cost(output, targets); if self.criterion.is_regularized() { - for i in 0..self.layers.len() { - cost += self.criterion.reg_cost(params[i]); - } + let all_params = unsafe { + MatrixSlice::from_raw_parts(weights.as_ptr(), weights.len(), 1, 1) + }; + utils::in_place_vec_bin_op(&mut gradients, + self.criterion.reg_cost_grad(all_params).data(), + |x, &y| *x = *x + y); + cost += self.criterion.reg_cost(all_params); } (cost, gradients) } diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index 3bc70001..b9229c86 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -132,7 +132,7 @@ impl NetLayer for Linear { } impl NetLayer for T { - /// Applys the activation function to each element of the input + /// Applies the activation function to each element of the input fn forward(&self, input: &Matrix, _: MatrixSlice) -> LearningResult> { Ok(input.clone().apply(&T::func)) } From 66f53feb138388eeef7233827ccdbe5548e68dce Mon Sep 17 00:00:00 2001 From: NivenT Date: Tue, 8 Nov 2016 00:00:26 -0800 Subject: [PATCH 23/28] Removed useless 'a and & --- src/learning/nnet/mod.rs | 6 +++--- src/learning/nnet/net_layer.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 1161bd71..269269e9 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -276,7 +276,7 @@ impl BaseNeuralNet { } /// Create a multilayer perceptron with the specified layer sizes. - fn mlp<'a, U>(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet + fn mlp(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet where U: ActivationFunc + 'static { let mut mlp = BaseNeuralNet { layers: Vec::with_capacity(2*(layer_sizes.len()-1)), @@ -365,7 +365,7 @@ impl BaseNeuralNet { params.push(slice); index += layer.num_params(); } - let output = &activations[activations.len()-1]; + let output = activations.last().unwrap(); // Backward propagation @@ -377,7 +377,7 @@ impl BaseNeuralNet { index -= layer.num_params(); let grad_params = &mut gradients[index..index+layer.num_params()]; - grad_params.copy_from_slice(&layer.back_params(&out_grad, activation, params[i]).data()); + grad_params.copy_from_slice(layer.back_params(&out_grad, activation, params[i]).data()); out_grad = layer.back_input(&out_grad, activation, params[i]); } diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index b9229c86..c963f036 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -93,7 +93,7 @@ impl NetLayer for Linear { } } } - + fn back_input(&self, out_grad: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { debug_assert_eq!(out_grad.cols(), params.cols()); let gradient = out_grad * ¶ms.transpose(); From ccef5d38079f9dfbedb47d99bfca0613828c9027 Mon Sep 17 00:00:00 2001 From: NivenT Date: Fri, 6 Jan 2017 00:43:10 -0800 Subject: [PATCH 24/28] removed useless Debug's --- src/learning/nnet/mod.rs | 4 +--- src/learning/nnet/net_layer.rs | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 269269e9..22311a8d 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -58,8 +58,6 @@ use learning::toolkit::regularization::Regularization; use learning::optim::{Optimizable, OptimAlgorithm}; use learning::optim::grad_desc::StochasticGD; -use std::fmt::Debug; - use self::net_layer::NetLayer; /// Neural Network Model @@ -259,7 +257,7 @@ pub struct BaseNeuralNet { impl BaseNeuralNet { /// Creates a base neural network with the specified layer sizes. fn default(layer_sizes: &[usize], activ_fn: U) -> BaseNeuralNet - where U: ActivationFunc + Debug + 'static { + where U: ActivationFunc + 'static { BaseNeuralNet::mlp(layer_sizes, BCECriterion::default(), activ_fn) } } diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index c963f036..2ecfb276 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -1,4 +1,4 @@ -//!Neural Network Layers +//! Neural Network Layers use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut}; use rulinalg::utils; @@ -131,7 +131,7 @@ impl NetLayer for Linear { } } -impl NetLayer for T { +impl NetLayer for T { /// Applies the activation function to each element of the input fn forward(&self, input: &Matrix, _: MatrixSlice) -> LearningResult> { Ok(input.clone().apply(&T::func)) From c236101414a27570730bdfbdd26f43da988ea251 Mon Sep 17 00:00:00 2001 From: NivenT Date: Fri, 6 Jan 2017 12:58:22 -0800 Subject: [PATCH 25/28] back_* functions now take the layer's output as an input --- src/learning/nnet/mod.rs | 5 +++-- src/learning/nnet/net_layer.rs | 31 +++++++++++++++++-------------- src/learning/toolkit/activ_fn.rs | 20 ++++++++++++++++++++ 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 22311a8d..430316dc 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -372,12 +372,13 @@ impl BaseNeuralNet { // at this point index == weights.len() for (i, layer) in self.layers.iter().enumerate().rev() { let activation = if i == 0 {inputs} else {&activations[i-1]}; + let result = &activations[i]; index -= layer.num_params(); let grad_params = &mut gradients[index..index+layer.num_params()]; - grad_params.copy_from_slice(layer.back_params(&out_grad, activation, params[i]).data()); + grad_params.copy_from_slice(layer.back_params(&out_grad, activation, result, params[i]).data()); - out_grad = layer.back_input(&out_grad, activation, params[i]); + out_grad = layer.back_input(&out_grad, activation, result, params[i]); } let mut cost = self.criterion.cost(output, targets); diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index 2ecfb276..1278b099 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -1,7 +1,6 @@ //! Neural Network Layers -use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut}; -use rulinalg::utils; +use linalg::{Matrix, MatrixSlice, BaseMatrix}; use learning::LearningResult; use learning::error::{Error, ErrorKind}; @@ -19,10 +18,10 @@ pub trait NetLayer : Debug { fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult>; /// The gradient of the output of this layer with respect to its input - fn back_input(&self, out_grad: &Matrix, input: &Matrix, params: MatrixSlice) -> Matrix; + fn back_input(&self, out_grad: &Matrix, input: &Matrix, output: &Matrix, params: MatrixSlice) -> Matrix; /// The gradient of the output of this layer with respect to its parameters - fn back_params(&self, out_grad: &Matrix, input: &Matrix, params: MatrixSlice) -> Matrix; + fn back_params(&self, out_grad: &Matrix, input: &Matrix, output: &Matrix, params: MatrixSlice) -> Matrix; /// The default value of the parameters of this layer before training fn default_params(&self) -> Vec; @@ -94,7 +93,7 @@ impl NetLayer for Linear { } } - fn back_input(&self, out_grad: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { + fn back_input(&self, out_grad: &Matrix, _: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { debug_assert_eq!(out_grad.cols(), params.cols()); let gradient = out_grad * ¶ms.transpose(); if self.has_bias { @@ -106,7 +105,7 @@ impl NetLayer for Linear { } } - fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { + fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { debug_assert_eq!(input.rows(), out_grad.rows()); if self.has_bias { &Matrix::ones(input.rows(), 1).hcat(input).transpose() * out_grad @@ -134,18 +133,22 @@ impl NetLayer for Linear { impl NetLayer for T { /// Applies the activation function to each element of the input fn forward(&self, input: &Matrix, _: MatrixSlice) -> LearningResult> { - Ok(input.clone().apply(&T::func)) + let mut output = Vec::with_capacity(input.rows()*input.cols()); + for val in input.data() { + output.push(T::func(*val)); + } + Ok(Matrix::new(input.rows(), input.cols(), output)) } - fn back_input(&self, out_grad: &Matrix, input: &Matrix, _: MatrixSlice) -> Matrix { - let mut in_grad = input.clone(); - utils::in_place_vec_bin_op(in_grad.mut_data(), out_grad.data(), |x, &y| { - *x = T::func_grad(*x) * y - }); - in_grad + fn back_input(&self, out_grad: &Matrix, _: &Matrix, output: &Matrix, _: MatrixSlice) -> Matrix { + let mut in_grad = Vec::with_capacity(output.rows()*output.cols()); + for (y, g) in output.data().iter().zip(out_grad.data()) { + in_grad.push(T::func_grad_from_output(*y) * g); + } + Matrix::new(output.rows(), output.cols(), in_grad) } - fn back_params(&self, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { + fn back_params(&self, _: &Matrix, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { Matrix::new(0, 0, Vec::new()) } diff --git a/src/learning/toolkit/activ_fn.rs b/src/learning/toolkit/activ_fn.rs index e8e76fc1..8529898e 100644 --- a/src/learning/toolkit/activ_fn.rs +++ b/src/learning/toolkit/activ_fn.rs @@ -18,6 +18,10 @@ pub trait ActivationFunc: Clone + Debug { /// The gradient of the activation function. fn func_grad(x: f64) -> f64; + /// The gradient of the activation function calculated using the output of the function. + /// Calculates f'(x) given f(x) as an input + fn func_grad_from_output(y: f64) -> f64; + /// The inverse of the activation function. fn func_inv(x: f64) -> f64; } @@ -41,6 +45,10 @@ impl ActivationFunc for Sigmoid { Self::func(x) * (1f64 - Self::func(x)) } + fn func_grad_from_output(y: f64) -> f64 { + y * (1f64 - y) + } + fn func_inv(x: f64) -> f64 { (x / (1f64 - x)).ln() } @@ -59,6 +67,10 @@ impl ActivationFunc for Linear { 1f64 } + fn func_grad_from_output(_: f64) -> f64 { + 1f64 + } + fn func_inv(x: f64) -> f64 { x } @@ -77,6 +89,10 @@ impl ActivationFunc for Exp { Self::func(x) } + fn func_grad_from_output(y: f64) -> f64 { + y + } + fn func_inv(x: f64) -> f64 { x.ln() } @@ -96,6 +112,10 @@ impl ActivationFunc for Tanh { 1.0 - y*y } + fn func_grad_from_output(y: f64) -> f64 { + 1.0 - y*y + } + fn func_inv(x: f64) -> f64 { 0.5*((1.0+x)/(1.0-x)).ln() } From 775f7eb817a3f6c21bfed41ce48e9a86f92eda0b Mon Sep 17 00:00:00 2001 From: NivenT Date: Sat, 21 Jan 2017 18:39:42 -0800 Subject: [PATCH 26/28] Fixed typo --- src/learning/nnet/net_layer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index 1278b099..c0e2c3ce 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -62,7 +62,7 @@ impl Linear { } } - /// Construct a Linear layer with a bias term + /// Construct a Linear layer without a bias term pub fn without_bias(input_size: usize, output_size: usize) -> Linear { Linear { input_size: input_size, From 95a74c5e10b960317fe09a63bd53f5fdaab3efb4 Mon Sep 17 00:00:00 2001 From: James Lucas Date: Tue, 21 Feb 2017 10:14:53 -0500 Subject: [PATCH 27/28] Minor cleanup and removing a matrix clone --- src/learning/nnet/mod.rs | 8 +++----- src/learning/nnet/net_layer.rs | 35 ++++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 430316dc..80856a2e 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -37,10 +37,10 @@ //! //! The neural networks are specified via a criterion - similar to //! [Torch](https://github.com/torch/nn/blob/master/doc/criterion.md). -//! The criterions combine an activation function and a cost function. +//! The criterions specify a cost function and any regularization. //! //! You can define your own criterion by implementing the `Criterion` -//! trait with a concrete `ActivationFunc` and `CostFunc`. +//! trait with a concrete `CostFunc`. pub mod net_layer; @@ -62,7 +62,7 @@ use self::net_layer::NetLayer; /// Neural Network Model /// -/// The Neural Network struct specifies a Criterion and +/// The Neural Network struct specifies a `Criterion` and /// a gradient descent algorithm. #[derive(Debug)] pub struct NeuralNet @@ -204,8 +204,6 @@ impl NeuralNet /// use rusty_machine::learning::toolkit::activ_fn::Sigmoid; /// use rusty_machine::learning::optim::grad_desc::StochasticGD; /// - /// use std::clone::Clone; - /// /// // Create a new neural net /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); /// diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index 1278b099..a9f734d8 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -40,8 +40,8 @@ pub trait NetLayer : Debug { /// /// Represents a fully connected layer with optional bias term /// -/// The parameters are a matrix of weights of size I x O -/// where O is the dimensionality of the output and I the dimensionality of the input +/// The parameters are a matrix of weights of size I x N +/// where N is the dimensionality of the output and I the dimensionality of the input #[derive(Debug, Clone, Copy)] pub struct Linear { /// The number of dimensions of the input @@ -62,7 +62,7 @@ impl Linear { } } - /// Construct a Linear layer with a bias term + /// Construct a Linear layer without a bias term pub fn without_bias(input_size: usize, output_size: usize) -> Linear { Linear { input_size: input_size, @@ -72,6 +72,31 @@ impl Linear { } } +fn remove_first_col(mat: Matrix) -> Matrix +{ + let rows = mat.rows(); + let cols = mat.cols(); + let mut data = mat.into_vec(); + + let len = data.len(); + let mut del = 0; + { + let v = &mut *data; + + for i in 0..len { + if i % cols == 0 { + del += 1; + } else if del > 0 { + v.swap(i - del, i); + } + } + } + if del > 0 { + data.truncate(len - del); + } + Matrix::new(rows, cols - 1, data) +} + impl NetLayer for Linear { /// Computes a matrix product /// @@ -97,9 +122,7 @@ impl NetLayer for Linear { debug_assert_eq!(out_grad.cols(), params.cols()); let gradient = out_grad * ¶ms.transpose(); if self.has_bias { - let rows = gradient.rows(); - let cols = gradient.cols() - 1; - gradient.sub_slice([0, 1], rows, cols).into() + remove_first_col(gradient) } else { gradient } From 34ea5d3e6a3bbda0679ff8e5d94891cca6f0a6ca Mon Sep 17 00:00:00 2001 From: NivenT Date: Fri, 24 Feb 2017 22:23:02 -0800 Subject: [PATCH 28/28] small changes --- src/learning/nnet/mod.rs | 33 ++++++++++++++++++--------------- src/learning/nnet/net_layer.rs | 2 +- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 80856a2e..5d0356ff 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -398,30 +398,33 @@ impl BaseNeuralNet { return Ok(inputs.clone()); } - let mut ptr = self.weights.as_ptr(); let mut outputs = unsafe { let shape = self.layers[0].param_shape(); - let slice = MatrixSlice::from_raw_parts(ptr, + let slice = MatrixSlice::from_raw_parts(self.weights.as_ptr(), shape.0, shape.1, shape.1); - ptr = ptr.offset(self.layers[0].num_params() as isize); try!(self.layers[0].forward(inputs, slice)) }; + + let mut index = self.layers[0].num_params(); for layer in self.layers.iter().skip(1) { let shape = layer.param_shape(); - unsafe { - let slice = MatrixSlice::from_raw_parts(ptr, - shape.0, - shape.1, - shape.1); - outputs = match layer.forward(&outputs, slice) { - Ok(act) => act, - Err(_) => {return Err(Error::new(ErrorKind::InvalidParameters, - "The network's layers do not line up correctly."))} - }; - ptr = ptr.offset(layer.num_params() as isize); - } + + let slice = unsafe { + MatrixSlice::from_raw_parts(self.weights.as_ptr().offset(index as isize), + shape.0, + shape.1, + shape.1) + }; + + outputs = match layer.forward(&outputs, slice) { + Ok(act) => act, + Err(_) => {return Err(Error::new(ErrorKind::InvalidParameters, + "The network's layers do not line up correctly."))} + }; + + index += layer.num_params(); } Ok(outputs) } diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index a9f734d8..8239d7ab 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -87,7 +87,7 @@ fn remove_first_col(mat: Matrix) -> Matrix if i % cols == 0 { del += 1; } else if del > 0 { - v.swap(i - del, i); + v[i - del] = v[i]; } } }