diff --git a/benches/examples/nnet.rs b/benches/examples/nnet.rs index 0c1848da..d67e434d 100644 --- a/benches/examples/nnet.rs +++ b/benches/examples/nnet.rs @@ -5,6 +5,7 @@ use std::vec::Vec; use rusty_machine::learning::nnet::{NeuralNet, BCECriterion}; use rusty_machine::learning::toolkit::regularization::Regularization; +use rusty_machine::learning::toolkit::activ_fn::Sigmoid; use rusty_machine::learning::optim::grad_desc::StochasticGD; use rusty_machine::linalg::Matrix; @@ -51,7 +52,7 @@ fn nnet_and_gate_train(b: &mut Bencher) { let criterion = BCECriterion::new(Regularization::L2(0.)); b.iter(|| { - let mut model = black_box(NeuralNet::new(layers, criterion, StochasticGD::default())); + let mut model = black_box(NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid)); let _ = black_box(model.train(&inputs, &targets).unwrap()); }) } @@ -62,7 +63,7 @@ fn nnet_and_gate_predict(b: &mut Bencher) { let layers = &[2, 1]; let criterion = BCECriterion::new(Regularization::L2(0.)); - let mut model = NeuralNet::new(layers, criterion, StochasticGD::default()); + let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid); let _ = model.train(&inputs, &targets); b.iter(|| { diff --git a/examples/nnet-and_gate.rs b/examples/nnet-and_gate.rs index b4c6b156..9fd7ac5e 100644 --- a/examples/nnet-and_gate.rs +++ b/examples/nnet-and_gate.rs @@ -6,6 +6,7 @@ use std::vec::Vec; use rusty_machine::learning::nnet::{NeuralNet, BCECriterion}; use rusty_machine::learning::toolkit::regularization::Regularization; +use rusty_machine::learning::toolkit::activ_fn::Sigmoid; use rusty_machine::learning::optim::grad_desc::StochasticGD; use rusty_machine::linalg::Matrix; @@ -41,7 +42,9 @@ fn main() { let layers = &[2, 1]; let criterion = BCECriterion::new(Regularization::L2(0.)); - let mut model = NeuralNet::new(layers, criterion, StochasticGD::default()); + // Create a multilayer perceptron with an input layer of size 2 and output layer of size 1 + // Uses a Sigmoid activation function and uses Stochastic gradient descent for training + let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid); println!("Training..."); // Our train function returns a Result<(), E> diff --git a/src/learning/nnet.rs b/src/learning/nnet/mod.rs similarity index 51% rename from src/learning/nnet.rs rename to src/learning/nnet/mod.rs index 3d926465..5d0356ff 100644 --- a/src/learning/nnet.rs +++ b/src/learning/nnet/mod.rs @@ -7,6 +7,7 @@ //! ``` //! use rusty_machine::learning::nnet::{NeuralNet, BCECriterion}; //! use rusty_machine::learning::toolkit::regularization::Regularization; +//! use rusty_machine::learning::toolkit::activ_fn::Sigmoid; //! use rusty_machine::learning::optim::grad_desc::StochasticGD; //! use rusty_machine::linalg::Matrix; //! use rusty_machine::learning::SupModel; @@ -22,8 +23,8 @@ //! // Choose the BCE criterion with L2 regularization (`lambda=0.1`). //! let criterion = BCECriterion::new(Regularization::L2(0.1)); //! -//! // We will just use the default stochastic gradient descent. -//! let mut model = NeuralNet::new(layers, criterion, StochasticGD::default()); +//! // We will create a multilayer perceptron and just use the default stochastic gradient descent. +//! let mut model = NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid); //! //! // Train the model! //! model.train(&inputs, &targets).unwrap(); @@ -36,12 +37,16 @@ //! //! The neural networks are specified via a criterion - similar to //! [Torch](https://github.com/torch/nn/blob/master/doc/criterion.md). -//! The criterions combine an activation function and a cost function. +//! The criterions specify a cost function and any regularization. //! //! You can define your own criterion by implementing the `Criterion` -//! trait with a concrete `ActivationFunc` and `CostFunc`. +//! trait with a concrete `CostFunc`. -use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut}; + +pub mod net_layer; + +use linalg::{Matrix, MatrixSlice}; +use rulinalg::utils; use learning::{LearningResult, SupModel}; use learning::error::{Error, ErrorKind}; @@ -53,28 +58,27 @@ use learning::toolkit::regularization::Regularization; use learning::optim::{Optimizable, OptimAlgorithm}; use learning::optim::grad_desc::StochasticGD; -use rand::thread_rng; -use rand::distributions::{Sample, range}; +use self::net_layer::NetLayer; /// Neural Network Model /// -/// The Neural Network struct specifies a Criterion and +/// The Neural Network struct specifies a `Criterion` and /// a gradient descent algorithm. #[derive(Debug)] -pub struct NeuralNet<'a, T, A> +pub struct NeuralNet where T: Criterion, - A: OptimAlgorithm> + A: OptimAlgorithm> { - base: BaseNeuralNet<'a, T>, + base: BaseNeuralNet, alg: A, } /// Supervised learning for the Neural Network. /// /// The model is trained using back propagation. -impl<'a, T, A> SupModel, Matrix> for NeuralNet<'a, T, A> +impl SupModel, Matrix> for NeuralNet where T: Criterion, - A: OptimAlgorithm> + A: OptimAlgorithm> { /// Predict neural network output using forward propagation. fn predict(&self, inputs: &Matrix) -> LearningResult> { @@ -89,7 +93,7 @@ impl<'a, T, A> SupModel, Matrix> for NeuralNet<'a, T, A> } } -impl<'a> NeuralNet<'a, BCECriterion, StochasticGD> { +impl NeuralNet { /// Creates a neural network with the specified layer sizes. /// /// The layer sizes slice should include the input, hidden layers, and output layer sizes. @@ -108,17 +112,36 @@ impl<'a> NeuralNet<'a, BCECriterion, StochasticGD> { /// ``` pub fn default(layer_sizes: &[usize]) -> NeuralNet { NeuralNet { - base: BaseNeuralNet::default(layer_sizes), + base: BaseNeuralNet::default(layer_sizes, activ_fn::Sigmoid), alg: StochasticGD::default(), } } } -impl<'a, T, A> NeuralNet<'a, T, A> +impl NeuralNet where T: Criterion, - A: OptimAlgorithm> + A: OptimAlgorithm> { - /// Create a new neural network with the specified layer sizes. + /// Create a new neural network with no layers + /// + /// # Examples + /// + /// ``` + /// use rusty_machine::learning::nnet::BCECriterion; + /// use rusty_machine::learning::nnet::NeuralNet; + /// use rusty_machine::learning::optim::grad_desc::StochasticGD; + /// + /// // Create a an empty neural net + /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); + /// ``` + pub fn new(criterion: T, alg: A) -> NeuralNet { + NeuralNet { + base: BaseNeuralNet::new(criterion), + alg: alg, + } + } + + /// Create a multilayer perceptron with the specified layer sizes. /// /// The layer sizes slice should include the input, hidden layers, and output layer sizes. /// The type of activation function must be specified. @@ -130,19 +153,71 @@ impl<'a, T, A> NeuralNet<'a, T, A> /// ``` /// use rusty_machine::learning::nnet::BCECriterion; /// use rusty_machine::learning::nnet::NeuralNet; + /// use rusty_machine::learning::toolkit::activ_fn::Sigmoid; /// use rusty_machine::learning::optim::grad_desc::StochasticGD; /// /// // Create a neural net with 4 layers, 3 neurons in each. /// let layers = &[3; 4]; - /// let mut net = NeuralNet::new(layers, BCECriterion::default(), StochasticGD::default()); + /// let mut net = NeuralNet::mlp(layers, BCECriterion::default(), StochasticGD::default(), Sigmoid); /// ``` - pub fn new(layer_sizes: &'a [usize], criterion: T, alg: A) -> NeuralNet<'a, T, A> { + pub fn mlp(layer_sizes: &[usize], criterion: T, alg: A, activ_fn: U) -> NeuralNet + where U: ActivationFunc + 'static { NeuralNet { - base: BaseNeuralNet::new(layer_sizes, criterion), + base: BaseNeuralNet::mlp(layer_sizes, criterion, activ_fn), alg: alg, } } + /// Adds the specified layer to the end of the network + /// + /// # Examples + /// + /// ``` + /// use rusty_machine::linalg::BaseMatrix; + /// use rusty_machine::learning::nnet::BCECriterion; + /// use rusty_machine::learning::nnet::NeuralNet; + /// use rusty_machine::learning::nnet::net_layer::Linear; + /// use rusty_machine::learning::optim::grad_desc::StochasticGD; + /// + /// // Create a new neural net + /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); + /// + /// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5 + /// // This net will not apply any activation function to the Linear layer outputs + /// net.add(Box::new(Linear::new(3, 4))) + /// .add(Box::new(Linear::new(4, 5))); + /// ``` + pub fn add<'a>(&'a mut self, layer: Box) -> &'a mut NeuralNet { + self.base.add(layer); + self + } + + /// Adds multiple layers to the end of the network + /// + /// # Examples + /// + /// ``` + /// use rusty_machine::linalg::BaseMatrix; + /// use rusty_machine::learning::nnet::BCECriterion; + /// use rusty_machine::learning::nnet::NeuralNet; + /// use rusty_machine::learning::nnet::net_layer::{NetLayer, Linear}; + /// use rusty_machine::learning::toolkit::activ_fn::Sigmoid; + /// use rusty_machine::learning::optim::grad_desc::StochasticGD; + /// + /// // Create a new neural net + /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); + /// + /// let linear_sig: Vec> = vec![Box::new(Linear::new(5, 5)), Box::new(Sigmoid)]; + /// + /// // Give net a layer of size 5, followed by a Sigmoid activation function + /// net.add_layers(linear_sig); + /// ``` + pub fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut NeuralNet + where U: IntoIterator> { + self.base.add_layers(layers); + self + } + /// Gets matrix of weights between specified layer and forward layer. /// /// # Examples @@ -168,86 +243,84 @@ impl<'a, T, A> NeuralNet<'a, T, A> /// Base Neural Network struct /// -/// This struct cannot be instantianated and is used internally only. +/// This struct cannot be instantiated and is used internally only. #[derive(Debug)] -pub struct BaseNeuralNet<'a, T: Criterion> { - layer_sizes: &'a [usize], +pub struct BaseNeuralNet { + layers: Vec>, weights: Vec, criterion: T, } -impl<'a> BaseNeuralNet<'a, BCECriterion> { +impl BaseNeuralNet { /// Creates a base neural network with the specified layer sizes. - fn default(layer_sizes: &[usize]) -> BaseNeuralNet { - BaseNeuralNet::new(layer_sizes, BCECriterion::default()) + fn default(layer_sizes: &[usize], activ_fn: U) -> BaseNeuralNet + where U: ActivationFunc + 'static { + BaseNeuralNet::mlp(layer_sizes, BCECriterion::default(), activ_fn) } } -impl<'a, T: Criterion> BaseNeuralNet<'a, T> { - /// Create a new base neural network with the specified layer sizes. - fn new(layer_sizes: &[usize], criterion: T) -> BaseNeuralNet { +impl BaseNeuralNet { + /// Create a base neural network with no layers + fn new(criterion: T) -> BaseNeuralNet { BaseNeuralNet { - layer_sizes: layer_sizes, - weights: BaseNeuralNet::::create_weights(layer_sizes), - criterion: criterion, + layers: Vec::new(), + weights: Vec::new(), + criterion: criterion } + } + + /// Create a multilayer perceptron with the specified layer sizes. + fn mlp(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet + where U: ActivationFunc + 'static { + let mut mlp = BaseNeuralNet { + layers: Vec::with_capacity(2*(layer_sizes.len()-1)), + weights: Vec::new(), + criterion: criterion + }; + for shape in layer_sizes.windows(2) { + mlp.add(Box::new(net_layer::Linear::new(shape[0], shape[1]))); + mlp.add(Box::new(activ_fn.clone())); + } + mlp + } + + /// Adds the specified layer to the end of the network + fn add<'a>(&'a mut self, layer: Box) -> &'a mut BaseNeuralNet { + self.weights.extend_from_slice(&layer.default_params()); + self.layers.push(layer); + self } - /// Creates initial weights for all neurons in the network. - fn create_weights(layer_sizes: &[usize]) -> Vec { - let mut between = range::Range::new(0f64, 1f64); - let mut rng = thread_rng(); - layer_sizes.windows(2) - .flat_map(|w| { - let l_in = w[0] + 1; - let l_out = w[1]; - let eps_init = (6f64 / (l_in + l_out) as f64).sqrt(); - (0..l_in * l_out) - .map(|_i| (between.sample(&mut rng) * 2f64 * eps_init) - eps_init) - .collect::>() - }) - .collect() + /// Adds multiple layers to the end of the network + fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut BaseNeuralNet + where U: IntoIterator> + { + for layer in layers { + self.add(layer); + } + self } - /// Gets matrix of weights between specified layer and forward layer for the weights. + /// Gets matrix of weights for the specified layer for the weights. fn get_layer_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice { - debug_assert!(idx < self.layer_sizes.len() - 1); + debug_assert!(idx < self.layers.len()); // Check that the weights are the right size. - let mut full_size = 0usize; - for l in 0..self.layer_sizes.len() - 1 { - full_size += (self.layer_sizes[l] + 1) * self.layer_sizes[l + 1]; - } + let full_size: usize = self.layers.iter().map(|l| l.num_params()).sum(); debug_assert_eq!(full_size, weights.len()); - let mut start = 0usize; - - for l in 0..idx { - start += (self.layer_sizes[l] + 1) * self.layer_sizes[l + 1] - } + let start: usize = self.layers.iter().take(idx).map(|l| l.num_params()).sum(); + let shape = self.layers[idx].param_shape(); unsafe { MatrixSlice::from_raw_parts(weights.as_ptr().offset(start as isize), - self.layer_sizes[idx] + 1, - self.layer_sizes[idx + 1], - self.layer_sizes[idx + 1]) + shape.0, + shape.1, + shape.1) } - - } - - /// Gets matrix of weights between specified layer and forward layer - /// for the base model. - fn get_net_weights(&self, idx: usize) -> MatrixSlice { - self.get_layer_weights(&self.weights[..], idx) - } - - /// Gets the weights for a layer excluding the bias weights. - fn get_non_bias_weights(&self, weights: &[f64], idx: usize) -> MatrixSlice { - let layer_weights = self.get_layer_weights(weights, idx); - layer_weights.sub_slice([1, 0], layer_weights.rows() - 1, layer_weights.cols()) } /// Compute the gradient using the back propagation algorithm. @@ -256,118 +329,110 @@ impl<'a, T: Criterion> BaseNeuralNet<'a, T> { inputs: &Matrix, targets: &Matrix) -> (f64, Vec) { - assert_eq!(inputs.cols(), self.layer_sizes[0]); - - let mut forward_weights = Vec::with_capacity(self.layer_sizes.len() - 1); - let mut activations = Vec::with_capacity(self.layer_sizes.len()); - - let net_data = Matrix::ones(inputs.rows(), 1).hcat(inputs); - - activations.push(net_data.clone()); + let mut gradients = Vec::with_capacity(weights.len()); + unsafe { + gradients.set_len(weights.len()); + } + // activations[i] is the output of layer[i] + let mut activations = Vec::with_capacity(self.layers.len()); + // params[i] is the weights for layer[i] + let mut params = Vec::with_capacity(self.layers.len()); // Forward propagation - { - let mut z = net_data * self.get_layer_weights(weights, 0); - forward_weights.push(z.clone()); - - for l in 1..self.layer_sizes.len() - 1 { - let mut a = self.criterion.activate(z.clone()); - let ones = Matrix::ones(a.rows(), 1); - - a = ones.hcat(&a); - - z = &a * self.get_layer_weights(weights, l); - activations.push(a); - forward_weights.push(z.clone()); - } - - activations.push(self.criterion.activate(z)); + + let mut index = 0; + for (i, layer) in self.layers.iter().enumerate() { + let shape = layer.param_shape(); + + let slice = unsafe { + MatrixSlice::from_raw_parts(weights.as_ptr().offset(index as isize), + shape.0, + shape.1, + shape.1) + }; + + let output = if i == 0 { + layer.forward(inputs, slice).unwrap() + } else { + layer.forward(activations.last().unwrap(), slice).unwrap() + }; + + activations.push(output); + params.push(slice); + index += layer.num_params(); } + let output = activations.last().unwrap(); - let mut deltas = Vec::with_capacity(self.layer_sizes.len() - 1); // Backward propagation - { - let z = forward_weights[self.layer_sizes.len() - 2].clone(); - let g = self.criterion.grad_activ(z); - - // Take GRAD_cost to compute this delta. - let mut delta = self.criterion - .cost_grad(&activations[self.layer_sizes.len() - 1], targets) - .elemul(&g); - - deltas.push(delta.clone()); - - for l in (1..self.layer_sizes.len() - 1).rev() { - let mut z = forward_weights[l - 1].clone(); - let ones = Matrix::ones(z.rows(), 1); - z = ones.hcat(&z); - - let g = self.criterion.grad_activ(z); - delta = (delta * Matrix::from(self.get_layer_weights(weights, l)).transpose()) - .elemul(&g); - - let non_one_rows = &(1..delta.cols()).collect::>()[..]; - delta = delta.select_cols(non_one_rows); - deltas.push(delta.clone()); - } + + // The gradient with respect to the current layer's output + let mut out_grad = self.criterion.cost_grad(output, targets); + // at this point index == weights.len() + for (i, layer) in self.layers.iter().enumerate().rev() { + let activation = if i == 0 {inputs} else {&activations[i-1]}; + let result = &activations[i]; + index -= layer.num_params(); + + let grad_params = &mut gradients[index..index+layer.num_params()]; + grad_params.copy_from_slice(layer.back_params(&out_grad, activation, result, params[i]).data()); + + out_grad = layer.back_input(&out_grad, activation, result, params[i]); } - let mut gradients = Vec::with_capacity(weights.len()); - - for (l, activ_item) in activations.iter().take(self.layer_sizes.len() - 1).enumerate() { - // Compute the gradient - let mut g = deltas[self.layer_sizes.len() - 2 - l].transpose() * activ_item; - - // Add the regularized gradient - if self.criterion.is_regularized() { - let layer = l; - let non_bias_weights = self.get_non_bias_weights(weights, layer); - let zeros = Matrix::zeros(1, non_bias_weights.cols()); - g += zeros.vcat(&self.criterion.reg_cost_grad(non_bias_weights)); - } - - gradients.append(&mut (g / inputs.rows() as f64).into_vec()); - } - - // Compute the cost - let mut cost = self.criterion.cost(&activations[activations.len() - 1], targets); - - // Add the regularized cost + let mut cost = self.criterion.cost(output, targets); if self.criterion.is_regularized() { - for i in 0..self.layer_sizes.len() - 1 { - cost += self.criterion.reg_cost(self.get_non_bias_weights(weights, i)); - } + let all_params = unsafe { + MatrixSlice::from_raw_parts(weights.as_ptr(), weights.len(), 1, 1) + }; + utils::in_place_vec_bin_op(&mut gradients, + self.criterion.reg_cost_grad(all_params).data(), + |x, &y| *x = *x + y); + cost += self.criterion.reg_cost(all_params); } - (cost, gradients) } /// Forward propagation of the model weights to get the outputs. fn forward_prop(&self, inputs: &Matrix) -> LearningResult> { - if inputs.cols() != self.layer_sizes[0] { - Err(Error::new(ErrorKind::InvalidData, - "The input data dimensions must match the first layer.")) - } else { - let net_data = Matrix::ones(inputs.rows(), 1).hcat(inputs); - - let mut z = net_data * self.get_net_weights(0); - let mut a = self.criterion.activate(z.clone()); - - for l in 1..self.layer_sizes.len() - 1 { - let ones = Matrix::ones(a.rows(), 1); - a = ones.hcat(&a); - z = a * self.get_net_weights(l); - a = self.criterion.activate(z.clone()); - } - - Ok(a) + if self.layers.is_empty() { + return Ok(inputs.clone()); + } + + let mut outputs = unsafe { + let shape = self.layers[0].param_shape(); + let slice = MatrixSlice::from_raw_parts(self.weights.as_ptr(), + shape.0, + shape.1, + shape.1); + try!(self.layers[0].forward(inputs, slice)) + }; + + let mut index = self.layers[0].num_params(); + for layer in self.layers.iter().skip(1) { + let shape = layer.param_shape(); + + let slice = unsafe { + MatrixSlice::from_raw_parts(self.weights.as_ptr().offset(index as isize), + shape.0, + shape.1, + shape.1) + }; + + outputs = match layer.forward(&outputs, slice) { + Ok(act) => act, + Err(_) => {return Err(Error::new(ErrorKind::InvalidParameters, + "The network's layers do not line up correctly."))} + }; + + index += layer.num_params(); } + Ok(outputs) } } /// Compute the gradient of the Neural Network using the /// back propagation algorithm. -impl<'a, T: Criterion> Optimizable for BaseNeuralNet<'a, T> { +impl Optimizable for BaseNeuralNet { type Inputs = Matrix; type Targets = Matrix; @@ -385,21 +450,9 @@ impl<'a, T: Criterion> Optimizable for BaseNeuralNet<'a, T> { /// /// Specifies an activation function and a cost function. pub trait Criterion { - /// The activation function for the criterion. - type ActFunc: ActivationFunc; /// The cost function for the criterion. type Cost: CostFunc>; - /// The activation function applied to a matrix. - fn activate(&self, mat: Matrix) -> Matrix { - mat.apply(&Self::ActFunc::func) - } - - /// The gradient of the activation function applied to a matrix. - fn grad_activ(&self, mat: Matrix) -> Matrix { - mat.apply(&Self::ActFunc::func_grad) - } - /// The cost function. /// /// Returns a scalar cost. @@ -462,7 +515,6 @@ pub struct BCECriterion { } impl Criterion for BCECriterion { - type ActFunc = activ_fn::Sigmoid; type Cost = cost_fn::CrossEntropyError; fn regularization(&self) -> Regularization { @@ -504,7 +556,6 @@ pub struct MSECriterion { } impl Criterion for MSECriterion { - type ActFunc = activ_fn::Linear; type Cost = cost_fn::MeanSqError; fn regularization(&self) -> Regularization { diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs new file mode 100644 index 00000000..8239d7ab --- /dev/null +++ b/src/learning/nnet/net_layer.rs @@ -0,0 +1,185 @@ +//! Neural Network Layers + +use linalg::{Matrix, MatrixSlice, BaseMatrix}; + +use learning::LearningResult; +use learning::error::{Error, ErrorKind}; +use learning::toolkit::activ_fn::ActivationFunc; + +use rand::thread_rng; +use rand::distributions::Sample; +use rand::distributions::normal::Normal; + +use std::fmt::Debug; + +/// Trait for neural net layers +pub trait NetLayer : Debug { + /// The result of propogating data forward through this layer + fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult>; + + /// The gradient of the output of this layer with respect to its input + fn back_input(&self, out_grad: &Matrix, input: &Matrix, output: &Matrix, params: MatrixSlice) -> Matrix; + + /// The gradient of the output of this layer with respect to its parameters + fn back_params(&self, out_grad: &Matrix, input: &Matrix, output: &Matrix, params: MatrixSlice) -> Matrix; + + /// The default value of the parameters of this layer before training + fn default_params(&self) -> Vec; + + /// The shape of the parameters used by this layer + fn param_shape(&self) -> (usize, usize); + + /// The number of parameters used by this layer + fn num_params(&self) -> usize { + let shape = self.param_shape(); + shape.0 * shape.1 + } +} + +/// Linear network layer +/// +/// Represents a fully connected layer with optional bias term +/// +/// The parameters are a matrix of weights of size I x N +/// where N is the dimensionality of the output and I the dimensionality of the input +#[derive(Debug, Clone, Copy)] +pub struct Linear { + /// The number of dimensions of the input + input_size: usize, + /// The number of dimensions of the output + output_size: usize, + /// Whether or not to include a bias term + has_bias: bool, +} + +impl Linear { + /// Construct a new Linear layer + pub fn new(input_size: usize, output_size: usize) -> Linear { + Linear { + input_size: input_size + 1, + output_size: output_size, + has_bias: true + } + } + + /// Construct a Linear layer without a bias term + pub fn without_bias(input_size: usize, output_size: usize) -> Linear { + Linear { + input_size: input_size, + output_size: output_size, + has_bias: false + } + } +} + +fn remove_first_col(mat: Matrix) -> Matrix +{ + let rows = mat.rows(); + let cols = mat.cols(); + let mut data = mat.into_vec(); + + let len = data.len(); + let mut del = 0; + { + let v = &mut *data; + + for i in 0..len { + if i % cols == 0 { + del += 1; + } else if del > 0 { + v[i - del] = v[i]; + } + } + } + if del > 0 { + data.truncate(len - del); + } + Matrix::new(rows, cols - 1, data) +} + +impl NetLayer for Linear { + /// Computes a matrix product + /// + /// input should have dimensions N x I + /// where N is the number of samples and I is the dimensionality of the input + fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult> { + if self.has_bias { + if input.cols()+1 != params.rows() { + Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) + } else { + Ok(&Matrix::ones(input.rows(), 1).hcat(input) * ¶ms) + } + } else { + if input.cols() != params.rows() { + Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) + } else { + Ok(input * ¶ms) + } + } + } + + fn back_input(&self, out_grad: &Matrix, _: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { + debug_assert_eq!(out_grad.cols(), params.cols()); + let gradient = out_grad * ¶ms.transpose(); + if self.has_bias { + remove_first_col(gradient) + } else { + gradient + } + } + + fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { + debug_assert_eq!(input.rows(), out_grad.rows()); + if self.has_bias { + &Matrix::ones(input.rows(), 1).hcat(input).transpose() * out_grad + } else { + &input.transpose() * out_grad + } + } + + /// Initializes weights using Xavier initialization + /// + /// weights drawn from gaussian distribution with 0 mean and variance 2/(input_size+output_size) + fn default_params(&self) -> Vec { + let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt()); + let mut rng = thread_rng(); + + (0..self.input_size*self.output_size).map(|_| distro.sample(&mut rng)) + .collect() + } + + fn param_shape(&self) -> (usize, usize) { + (self.input_size, self.output_size) + } +} + +impl NetLayer for T { + /// Applies the activation function to each element of the input + fn forward(&self, input: &Matrix, _: MatrixSlice) -> LearningResult> { + let mut output = Vec::with_capacity(input.rows()*input.cols()); + for val in input.data() { + output.push(T::func(*val)); + } + Ok(Matrix::new(input.rows(), input.cols(), output)) + } + + fn back_input(&self, out_grad: &Matrix, _: &Matrix, output: &Matrix, _: MatrixSlice) -> Matrix { + let mut in_grad = Vec::with_capacity(output.rows()*output.cols()); + for (y, g) in output.data().iter().zip(out_grad.data()) { + in_grad.push(T::func_grad_from_output(*y) * g); + } + Matrix::new(output.rows(), output.cols(), in_grad) + } + + fn back_params(&self, _: &Matrix, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { + Matrix::new(0, 0, Vec::new()) + } + + fn default_params(&self) -> Vec { + Vec::new() + } + + fn param_shape(&self) -> (usize, usize) { + (0, 0) + } +} diff --git a/src/learning/toolkit/activ_fn.rs b/src/learning/toolkit/activ_fn.rs index 0ffe4569..8529898e 100644 --- a/src/learning/toolkit/activ_fn.rs +++ b/src/learning/toolkit/activ_fn.rs @@ -8,14 +8,20 @@ //! You can also create your own custom activation Functions for use in your models. //! Just create a unit struct implementing the `ActivationFunc` trait. +use std::fmt::Debug; + /// Trait for activation functions in models. -pub trait ActivationFunc { +pub trait ActivationFunc: Clone + Debug { /// The activation function. fn func(x: f64) -> f64; /// The gradient of the activation function. fn func_grad(x: f64) -> f64; + /// The gradient of the activation function calculated using the output of the function. + /// Calculates f'(x) given f(x) as an input + fn func_grad_from_output(y: f64) -> f64; + /// The inverse of the activation function. fn func_inv(x: f64) -> f64; } @@ -39,6 +45,10 @@ impl ActivationFunc for Sigmoid { Self::func(x) * (1f64 - Self::func(x)) } + fn func_grad_from_output(y: f64) -> f64 { + y * (1f64 - y) + } + fn func_inv(x: f64) -> f64 { (x / (1f64 - x)).ln() } @@ -57,6 +67,10 @@ impl ActivationFunc for Linear { 1f64 } + fn func_grad_from_output(_: f64) -> f64 { + 1f64 + } + fn func_inv(x: f64) -> f64 { x } @@ -75,7 +89,34 @@ impl ActivationFunc for Exp { Self::func(x) } + fn func_grad_from_output(y: f64) -> f64 { + y + } + fn func_inv(x: f64) -> f64 { x.ln() } } + +/// Hyperbolic tangent activation function +#[derive(Clone, Copy, Debug)] +pub struct Tanh; + +impl ActivationFunc for Tanh { + fn func(x: f64) -> f64 { + x.tanh() + } + + fn func_grad(x: f64) -> f64 { + let y = x.tanh(); + 1.0 - y*y + } + + fn func_grad_from_output(y: f64) -> f64 { + 1.0 - y*y + } + + fn func_inv(x: f64) -> f64 { + 0.5*((1.0+x)/(1.0-x)).ln() + } +} \ No newline at end of file