From 611c76f723405fe40ed1ba5c871d5e88b617aed3 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Mon, 7 Sep 2015 13:00:32 -0700 Subject: [PATCH 01/10] Documentation and indentation fixes --- .../org/apache/spark/ml/ann/BreezeUtil.scala | 7 +- .../scala/org/apache/spark/ml/ann/Layer.scala | 199 +++++++++--------- 2 files changed, 106 insertions(+), 100 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala index 7429f9d652ac5..c05097cd8d58a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala @@ -21,7 +21,7 @@ import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS} /** - * In-place DGEMM and DGEMV for Breeze + * In-place DGEMM and DGEMV for Breeze. */ private[ann] object BreezeUtil { @@ -29,7 +29,7 @@ private[ann] object BreezeUtil { private def transposeString(a: BDM[Double]): String = if (a.isTranspose) "T" else "N" /** - * DGEMM: C := alpha * A * B + beta * C + * DGEMM: C := alpha * A * B + beta * C. * @param alpha alpha * @param a A * @param b B @@ -37,7 +37,6 @@ private[ann] object BreezeUtil { * @param c C */ def dgemm(alpha: Double, a: BDM[Double], b: BDM[Double], beta: Double, c: BDM[Double]): Unit = { - // TODO: add code if matrices isTranspose!!! require(a.cols == b.rows, "A & B Dimension mismatch!") require(a.rows == c.rows, "A & C Dimension mismatch!") require(b.cols == c.cols, "A & C Dimension mismatch!") @@ -47,7 +46,7 @@ private[ann] object BreezeUtil { } /** - * DGEMV: y := alpha * A * x + beta * y + * DGEMV: y := alpha * A * x + beta * y. * @param alpha alpha * @param a A * @param x x diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index b5258ff348477..1ff093adfb72d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -33,7 +33,7 @@ import org.apache.spark.util.random.XORShiftRandom */ private[ann] trait Layer extends Serializable { /** - * Returns the instance of the layer based on weights provided + * Returns the instance of the layer based on weights provided. * @param weights vector with layer weights * @param position position of weights in the vector * @return the layer model @@ -41,7 +41,7 @@ private[ann] trait Layer extends Serializable { def getInstance(weights: Vector, position: Int): LayerModel /** - * Returns the instance of the layer with random generated weights + * Returns the instance of the layer with random generated weights. * @param seed seed * @return the layer model */ @@ -55,27 +55,29 @@ private[ann] trait Layer extends Serializable { */ private[ann] trait LayerModel extends Serializable { /** - * number of weights + * Number of weights. */ val size: Int /** - * Evaluates the data (process the data through the layer) + * Evaluates the data (process the data through the layer). * @param data data * @return processed data */ def eval(data: BDM[Double]): BDM[Double] /** - * Computes the delta for back propagation + * Computes the delta (gradient of the objective function with respect to weighted inputs) for back + * propagation. * @param nextDelta delta of the next layer * @param input input data - * @return delta + * @return delta for this layer */ def prevDelta(nextDelta: BDM[Double], input: BDM[Double]): BDM[Double] /** - * Computes the gradient + * Computes the gradient of the objective function with respect to this layer's weights and + * biases. * @param delta delta for this layer * @param input input data * @return gradient @@ -83,14 +85,14 @@ private[ann] trait LayerModel extends Serializable { def grad(delta: BDM[Double], input: BDM[Double]): Array[Double] /** - * Returns weights for the layer in a single vector + * Returns weights for the layer in a single vector. * @return layer weights */ def weights(): Vector } /** - * Layer properties of affine transformations, that is y=A*x+b + * Layer properties of affine transformations, that is y=A*x+b. * @param numIn number of inputs * @param numOut number of outputs */ @@ -106,35 +108,41 @@ private[ann] class AffineLayer(val numIn: Int, val numOut: Int) extends Layer { } /** - * Model of Affine layer y=A*x+b + * Model of Affine layer y=A*x+b. * @param w weights (matrix A) * @param b bias (vector b) */ private[ann] class AffineLayerModel private(w: BDM[Double], b: BDV[Double]) extends LayerModel { val size = w.size + b.length - val gwb = new Array[Double](size) - private lazy val gw: BDM[Double] = new BDM[Double](w.rows, w.cols, gwb) - private lazy val gb: BDV[Double] = new BDV[Double](gwb, w.size) + val gwb = new Array[Double](size) // gradient with respect to weights + biases + private lazy val gw: BDM[Double] = new BDM[Double](w.rows, w.cols, gwb) // gradient wrt weights + private lazy val gb: BDV[Double] = new BDV[Double](gwb, w.size) // gradient wrt biases private var z: BDM[Double] = null private var d: BDM[Double] = null private var ones: BDV[Double] = null override def eval(data: BDM[Double]): BDM[Double] = { - if (z == null || z.cols != data.cols) z = new BDM[Double](w.rows, data.cols) + if (z == null || z.cols != data.cols) { + z = new BDM[Double](w.rows, data.cols) + } z(::, *) := b BreezeUtil.dgemm(1.0, w, data, 1.0, z) z } override def prevDelta(nextDelta: BDM[Double], input: BDM[Double]): BDM[Double] = { - if (d == null || d.cols != nextDelta.cols) d = new BDM[Double](w.cols, nextDelta.cols) + if (d == null || d.cols != nextDelta.cols) { + d = new BDM[Double](w.cols, nextDelta.cols) + } BreezeUtil.dgemm(1.0, w.t, nextDelta, 0.0, d) d } override def grad(delta: BDM[Double], input: BDM[Double]): Array[Double] = { BreezeUtil.dgemm(1.0 / input.cols, delta, input.t, 0.0, gw) - if (ones == null || ones.length != delta.cols) ones = BDV.ones[Double](delta.cols) + if (ones == null || ones.length != delta.cols) { + ones = BDV.ones[Double](delta.cols) + } BreezeUtil.dgemv(1.0 / input.cols, delta, ones, 0.0, gb) gwb } @@ -143,12 +151,12 @@ private[ann] class AffineLayerModel private(w: BDM[Double], b: BDV[Double]) exte } /** - * Fabric for Affine layer models + * Fabric for Affine layer models. */ private[ann] object AffineLayerModel { /** - * Creates a model of Affine layer + * Creates a model of Affine layer. * @param layer layer properties * @param weights vector with weights * @param position position of weights in the vector @@ -160,7 +168,7 @@ private[ann] object AffineLayerModel { } /** - * Creates a model of Affine layer + * Creates a model of Affine layer. * @param layer layer properties * @param seed seed * @return model of Affine layer @@ -171,7 +179,7 @@ private[ann] object AffineLayerModel { } /** - * Unrolls the weights from the vector + * Unrolls the weights from the vector. * @param weights vector with weights * @param position position of weights for this layer * @param numIn number of layer inputs @@ -179,10 +187,10 @@ private[ann] object AffineLayerModel { * @return matrix A and vector b */ def unroll( - weights: Vector, - position: Int, - numIn: Int, - numOut: Int): (BDM[Double], BDV[Double]) = { + weights: Vector, + position: Int, + numIn: Int, + numOut: Int): (BDM[Double], BDV[Double]) = { val weightsCopy = weights.toArray // TODO: the array is not copied to BDMs, make sure this is OK! val a = new BDM[Double](numOut, numIn, weightsCopy, position) @@ -191,7 +199,7 @@ private[ann] object AffineLayerModel { } /** - * Roll the layer weights into a vector + * Roll the layer weights into a vector. * @param a matrix A * @param b vector b * @return vector of weights @@ -205,7 +213,7 @@ private[ann] object AffineLayerModel { } /** - * Generate random weights for the layer + * Generate random weights for the layer. * @param numIn number of inputs * @param numOut number of outputs * @param seed seed @@ -220,26 +228,26 @@ private[ann] object AffineLayerModel { } /** - * Trait for functions and their derivatives for functional layers + * Trait for functions and their derivatives for functional layers. */ private[ann] trait ActivationFunction extends Serializable { /** - * Implements a function + * Implements in-place application of a function. * @param x input data - * @param y output data + * @param y output data, mutated to hold the result of applying the function to the given input */ def eval(x: BDM[Double], y: BDM[Double]): Unit /** - * Implements a derivative of a function (needed for the back propagation) + * Implements in-place application of the derivative of a function (needed for back propagation). * @param x input data - * @param y output data + * @param y output data, mutated to hold the function's derivative value for given input */ def derivative(x: BDM[Double], y: BDM[Double]): Unit /** - * Implements a cross entropy error of a function. + * Implements the cross entropy error of a function. * Needed if the functional layer that contains this function is the output layer * of the network. * @param target target output @@ -250,7 +258,7 @@ private[ann] trait ActivationFunction extends Serializable { def crossEntropy(target: BDM[Double], output: BDM[Double], result: BDM[Double]): Double /** - * Implements a mean squared error of a function + * Implements the mean squared error of a function. * @param target target output * @param output computed output * @param result intermediate result @@ -260,7 +268,7 @@ private[ann] trait ActivationFunction extends Serializable { } /** - * Implements in-place application of functions + * Implements in-place application of functions. */ private[ann] object ActivationFunction { @@ -277,10 +285,10 @@ private[ann] object ActivationFunction { } def apply( - x1: BDM[Double], - x2: BDM[Double], - y: BDM[Double], - func: (Double, Double) => Double): Unit = { + x1: BDM[Double], + x2: BDM[Double], + y: BDM[Double], + func: (Double, Double) => Double): Unit = { var i = 0 while (i < x1.rows) { var j = 0 @@ -294,7 +302,7 @@ private[ann] object ActivationFunction { } /** - * Implements SoftMax activation function + * Implements Softmax activation function. */ private[ann] class SoftmaxFunction extends ActivationFunction { override def eval(x: BDM[Double], y: BDM[Double]): Unit = { @@ -327,9 +335,9 @@ private[ann] class SoftmaxFunction extends ActivationFunction { } override def crossEntropy( - output: BDM[Double], - target: BDM[Double], - result: BDM[Double]): Double = { + output: BDM[Double], + target: BDM[Double], + result: BDM[Double]): Double = { def m(o: Double, t: Double): Double = o - t ActivationFunction(output, target, result, m) -Bsum( target :* Blog(output)) / output.cols @@ -341,12 +349,12 @@ private[ann] class SoftmaxFunction extends ActivationFunction { } override def squared(output: BDM[Double], target: BDM[Double], result: BDM[Double]): Double = { - throw new UnsupportedOperationException("Sorry, squared error is not defined for SoftMax.") + throw new UnsupportedOperationException("Sorry, squared error is not defined for Softmax.") } } /** - * Implements Sigmoid activation function + * Implements Sigmoid activation function. */ private[ann] class SigmoidFunction extends ActivationFunction { override def eval(x: BDM[Double], y: BDM[Double]): Unit = { @@ -355,9 +363,9 @@ private[ann] class SigmoidFunction extends ActivationFunction { } override def crossEntropy( - output: BDM[Double], - target: BDM[Double], - result: BDM[Double]): Double = { + output: BDM[Double], + target: BDM[Double], + result: BDM[Double]): Double = { def m(o: Double, t: Double): Double = o - t ActivationFunction(output, target, result, m) -Bsum(target :* Blog(output)) / output.cols @@ -380,7 +388,7 @@ private[ann] class SigmoidFunction extends ActivationFunction { } /** - * Functional layer properties, y = f(x) + * Functional layer properties, y = f(x). * @param activationFunction activation function */ private[ann] class FunctionalLayer (val activationFunction: ActivationFunction) extends Layer { @@ -437,7 +445,7 @@ private[ann] class FunctionalLayerModel private (val activationFunction: Activat } def error(output: BDM[Double], target: BDM[Double]): (BDM[Double], Double) = { - // TODO: allow user pick error + // TODO: allow user to pick error activationFunction match { case sigmoid: SigmoidFunction => squared(output, target) case softmax: SoftmaxFunction => crossEntropy(output, target) @@ -446,7 +454,7 @@ private[ann] class FunctionalLayerModel private (val activationFunction: Activat } /** - * Fabric of functional layer models + * Fabric of functional layer models. */ private[ann] object FunctionalLayerModel { def apply(layer: FunctionalLayer): FunctionalLayerModel = @@ -454,33 +462,33 @@ private[ann] object FunctionalLayerModel { } /** - * Trait for the artificial neural network (ANN) topology properties + * Trait for the artificial neural network (ANN) topology properties. */ -private[ann] trait Topology extends Serializable{ +private[ann] trait Topology extends Serializable { def getInstance(weights: Vector): TopologyModel def getInstance(seed: Long): TopologyModel } /** - * Trait for ANN topology model + * Trait for ANN topology model. */ -private[ann] trait TopologyModel extends Serializable{ +private[ann] trait TopologyModel extends Serializable { /** - * Forward propagation + * Forward propagation. * @param data input data * @return array of outputs for each of the layers */ def forward(data: BDM[Double]): Array[BDM[Double]] /** - * Prediction of the model + * Prediction of the model. * @param data input data * @return prediction */ def predict(data: Vector): Vector /** - * Computes gradient for the network + * Computes gradient for the network. * @param data input data * @param target target output * @param cumGradient cumulative gradient @@ -488,18 +496,18 @@ private[ann] trait TopologyModel extends Serializable{ * @return error */ def computeGradient(data: BDM[Double], target: BDM[Double], cumGradient: Vector, - blockSize: Int): Double + blockSize: Int): Double /** - * Returns the weights of the ANN + * Returns the weights of the ANN. * @return weights */ def weights(): Vector } /** - * Feed forward ANN - * @param layers + * Feed forward ANN. + * @param layers the layers of the neural network */ private[ann] class FeedForwardTopology private(val layers: Array[Layer]) extends Topology { override def getInstance(weights: Vector): TopologyModel = FeedForwardModel(this, weights) @@ -508,11 +516,11 @@ private[ann] class FeedForwardTopology private(val layers: Array[Layer]) extends } /** - * Factory for some of the frequently-used topologies + * Factory for some of the frequently-used topologies. */ private[ml] object FeedForwardTopology { /** - * Creates a feed forward topology from the array of layers + * Creates a feed forward topology from the array of layers. * @param layers array of layers * @return feed forward topology */ @@ -521,10 +529,9 @@ private[ml] object FeedForwardTopology { } /** - * Creates a multi-layer perceptron + * Creates a multi-layer perceptron. * @param layerSizes sizes of layers including input and output size - * @param softmax wether to use SoftMax or Sigmoid function for an output layer. - * Softmax is default + * @param softmax wether to use Softmax or Sigmoid function for an output layer (default: Softmax) * @return multilayer perceptron topology */ def multiLayerPerceptron(layerSizes: Array[Int], softmax: Boolean = true): FeedForwardTopology = { @@ -561,10 +568,10 @@ private[ml] class FeedForwardModel private( } override def computeGradient( - data: BDM[Double], - target: BDM[Double], - cumGradient: Vector, - realBatchSize: Int): Double = { + data: BDM[Double], + target: BDM[Double], + cumGradient: Vector, + realBatchSize: Int): Double = { val outputs = forward(data) val deltas = new Array[BDM[Double]](layerModels.length) val L = layerModels.length - 1 @@ -624,12 +631,12 @@ private[ml] class FeedForwardModel private( } /** - * Fabric for feed forward ANN models + * Fabric for feed forward ANN models. */ private[ann] object FeedForwardModel { /** - * Creates a model from a topology and weights + * Creates a model from a topology and weights. * @param topology topology * @param weights weights * @return model @@ -646,7 +653,7 @@ private[ann] object FeedForwardModel { } /** - * Creates a model given a topology and seed + * Creates a model given a topology and seed. * @param topology topology * @param seed seed for generating the weights * @return model @@ -664,7 +671,7 @@ private[ann] object FeedForwardModel { } /** - * Neural network gradient. Does nothing but calling Model's gradient + * Neural network gradient. Does nothing but call [[LayerModel.grad()]]. * @param topology topology * @param dataStacker data stacker */ @@ -677,10 +684,10 @@ private[ann] class ANNGradient(topology: Topology, dataStacker: DataStacker) ext } override def compute( - data: Vector, - label: Double, - weights: Vector, - cumGradient: Vector): Double = { + data: Vector, + label: Double, + weights: Vector, + cumGradient: Vector): Double = { val (input, target, realBatchSize) = dataStacker.unstack(data) val model = topology.getInstance(weights) model.computeGradient(input, target, cumGradient, realBatchSize) @@ -700,7 +707,7 @@ private[ann] class DataStacker(stackSize: Int, inputSize: Int, outputSize: Int) extends Serializable { /** - * Stacks the data + * Stacks the data. * @param data RDD of vector pairs * @return RDD of double (always zero) and vector that contains the stacked vectors */ @@ -732,7 +739,7 @@ private[ann] class DataStacker(stackSize: Int, inputSize: Int, outputSize: Int) } /** - * Unstack the stacked vectors into matrices for batch operations + * Unstack the stacked vectors into matrices for batch operations. * @param data stacked vector * @return pair of matrices holding input and output data and the real stack size */ @@ -746,16 +753,16 @@ private[ann] class DataStacker(stackSize: Int, inputSize: Int, outputSize: Int) } /** - * Simple updater + * Simple updater. */ private[ann] class ANNUpdater extends Updater { override def compute( - weightsOld: Vector, - gradient: Vector, - stepSize: Double, - iter: Int, - regParam: Double): (Vector, Double) = { + weightsOld: Vector, + gradient: Vector, + stepSize: Double, + iter: Int, + regParam: Double): (Vector, Double) = { val thisIterStepSize = stepSize val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector Baxpy(-thisIterStepSize, gradient.toBreeze, brzWeights) @@ -764,7 +771,7 @@ private[ann] class ANNUpdater extends Updater { } /** - * MLlib-style trainer class that trains a network given the data and topology + * MLlib-style trainer class that trains a network given the data and topology. * @param topology topology of ANN * @param inputSize input size * @param outputSize output size @@ -783,13 +790,13 @@ private[ml] class FeedForwardTrainer( private var optimizer: Optimizer = LBFGSOptimizer.setConvergenceTol(1e-4).setNumIterations(100) /** - * Returns weights + * Returns weights. * @return weights */ def getWeights: Vector = _weights /** - * Sets weights + * Sets weights. * @param value weights * @return trainer */ @@ -799,7 +806,7 @@ private[ml] class FeedForwardTrainer( } /** - * Sets the stack size + * Sets the stack size. * @param value stack size * @return trainer */ @@ -810,7 +817,7 @@ private[ml] class FeedForwardTrainer( } /** - * Sets the SGD optimizer + * Sets the SGD optimizer. * @return SGD optimizer */ def SGDOptimizer: GradientDescent = { @@ -820,8 +827,8 @@ private[ml] class FeedForwardTrainer( } /** - * Sets the LBFGS optimizer - * @return LBGS optimizer + * Sets the L-BFGS optimizer. + * @return L-BFGS optimizer */ def LBFGSOptimizer: LBFGS = { val lbfgs = new LBFGS(_gradient, _updater) @@ -830,7 +837,7 @@ private[ml] class FeedForwardTrainer( } /** - * Sets the updater + * Sets the updater. * @param value updater * @return trainer */ @@ -841,7 +848,7 @@ private[ml] class FeedForwardTrainer( } /** - * Sets the gradient + * Sets the gradient. * @param value gradient * @return trainer */ @@ -870,7 +877,7 @@ private[ml] class FeedForwardTrainer( } /** - * Trains the ANN + * Trains the ANN. * @param data RDD of input and output vector pairs * @return model */ From 7b192db010ff660da39404009ce883f903f9980a Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Mon, 7 Sep 2015 13:02:36 -0700 Subject: [PATCH 02/10] Refactors unneeded helpers --- .../scala/org/apache/spark/ml/ann/Layer.scala | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index 1ff093adfb72d..1cc7712f1129a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -841,9 +841,14 @@ private[ml] class FeedForwardTrainer( * @param value updater * @return trainer */ - def setUpdater(value: Updater): FeedForwardTrainer = { - _updater = value - updateUpdater(value) + def setUpdater(updater: Updater): FeedForwardTrainer = { + _updater = updater + optimizer match { + case lbfgs: LBFGS => lbfgs.setUpdater(updater) + case sgd: GradientDescent => sgd.setUpdater(updater) + case other => throw new UnsupportedOperationException( + s"Only LBFGS and GradientDescent are supported but got ${other.getClass}.") + } this } @@ -852,28 +857,15 @@ private[ml] class FeedForwardTrainer( * @param value gradient * @return trainer */ - def setGradient(value: Gradient): FeedForwardTrainer = { - _gradient = value - updateGradient(value) - this - } - - private[this] def updateGradient(gradient: Gradient): Unit = { + def setGradient(gradient: Gradient): FeedForwardTrainer = { + _gradient = gradient optimizer match { case lbfgs: LBFGS => lbfgs.setGradient(gradient) case sgd: GradientDescent => sgd.setGradient(gradient) case other => throw new UnsupportedOperationException( s"Only LBFGS and GradientDescent are supported but got ${other.getClass}.") } - } - - private[this] def updateUpdater(updater: Updater): Unit = { - optimizer match { - case lbfgs: LBFGS => lbfgs.setUpdater(updater) - case sgd: GradientDescent => sgd.setUpdater(updater) - case other => throw new UnsupportedOperationException( - s"Only LBFGS and GradientDescent are supported but got ${other.getClass}.") - } + this } /** From 12169d7236caaad0db57d941cb9999f47a947b61 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Mon, 7 Sep 2015 13:08:05 -0700 Subject: [PATCH 03/10] More doc and style fixes --- .../scala/org/apache/spark/ml/ann/Layer.scala | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index 1cc7712f1129a..e5309fdc0116c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -27,8 +27,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.util.random.XORShiftRandom /** - * Trait that holds Layer properties, that are needed to instantiate it. - * Implements Layer instantiation. + * Trait holding Layer properties required for instantiating a [[LayerModel]]. * */ private[ann] trait Layer extends Serializable { @@ -49,9 +48,9 @@ private[ann] trait Layer extends Serializable { } /** - * Trait that holds Layer weights (or parameters). - * Implements functions needed for forward propagation, computing delta and gradient. - * Can return weights in Vector format. + * Trait holding weights (or parameters) for a layer. + * Implements functions needed for forward and back propagation. + * Can return weights in [[Vector]] format. */ private[ann] trait LayerModel extends Serializable { /** @@ -60,24 +59,23 @@ private[ann] trait LayerModel extends Serializable { val size: Int /** - * Evaluates the data (process the data through the layer). + * Evaluates the data (process the data forwards through the layer). * @param data data * @return processed data */ def eval(data: BDM[Double]): BDM[Double] /** - * Computes the delta (gradient of the objective function with respect to weighted inputs) for back - * propagation. - * @param nextDelta delta of the next layer + * Computes the delta (gradient of the objective function with respect to weighted inputs) for + * back propagation. + * @param nextDelta delta for the next layer * @param input input data * @return delta for this layer */ def prevDelta(nextDelta: BDM[Double], input: BDM[Double]): BDM[Double] /** - * Computes the gradient of the objective function with respect to this layer's weights and - * biases. + * Computes the gradient of the objective function with respect to this layer's weights. * @param delta delta for this layer * @param input input data * @return gradient @@ -92,7 +90,7 @@ private[ann] trait LayerModel extends Serializable { } /** - * Layer properties of affine transformations, that is y=A*x+b. + * Layer properties for affine transformations, that is y=A*x+b. * @param numIn number of inputs * @param numOut number of outputs */ @@ -416,13 +414,17 @@ private[ann] class FunctionalLayerModel private (val activationFunction: Activat private lazy val dg = new Array[Double](0) override def eval(data: BDM[Double]): BDM[Double] = { - if (f == null || f.cols != data.cols) f = new BDM[Double](data.rows, data.cols) + if (f == null || f.cols != data.cols) { + f = new BDM[Double](data.rows, data.cols) + } activationFunction.eval(data, f) f } override def prevDelta(nextDelta: BDM[Double], input: BDM[Double]): BDM[Double] = { - if (d == null || d.cols != nextDelta.cols) d = new BDM[Double](nextDelta.rows, nextDelta.cols) + if (d == null || d.cols != nextDelta.cols) { + d = new BDM[Double](nextDelta.rows, nextDelta.cols) + } activationFunction.derivative(input, d) d :*= nextDelta d @@ -433,13 +435,17 @@ private[ann] class FunctionalLayerModel private (val activationFunction: Activat override def weights(): Vector = Vectors.dense(new Array[Double](0)) def crossEntropy(output: BDM[Double], target: BDM[Double]): (BDM[Double], Double) = { - if (e == null || e.cols != output.cols) e = new BDM[Double](output.rows, output.cols) + if (e == null || e.cols != output.cols) { + e = new BDM[Double](output.rows, output.cols) + } val error = activationFunction.crossEntropy(output, target, e) (e, error) } def squared(output: BDM[Double], target: BDM[Double]): (BDM[Double], Double) = { - if (e == null || e.cols != output.cols) e = new BDM[Double](output.rows, output.cols) + if (e == null || e.cols != output.cols) { + e = new BDM[Double](output.rows, output.cols) + } val error = activationFunction.squared(output, target, e) (e, error) } @@ -717,8 +723,8 @@ private[ann] class DataStacker(stackSize: Int, inputSize: Int, outputSize: Int) (0.0, Vectors.fromBreeze(BDV.vertcat( v._1.toBreeze.toDenseVector, - v._2.toBreeze.toDenseVector)) - ) } + v._2.toBreeze.toDenseVector))) + } } else { data.mapPartitions { it => it.grouped(stackSize).map { seq => @@ -838,7 +844,7 @@ private[ml] class FeedForwardTrainer( /** * Sets the updater. - * @param value updater + * @param updater updater * @return trainer */ def setUpdater(updater: Updater): FeedForwardTrainer = { @@ -854,7 +860,7 @@ private[ml] class FeedForwardTrainer( /** * Sets the gradient. - * @param value gradient + * @param gradient gradient * @return trainer */ def setGradient(gradient: Gradient): FeedForwardTrainer = { From bc52b652541f2d6c4508662af937e0a26e207651 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Mon, 7 Sep 2015 13:39:21 -0700 Subject: [PATCH 04/10] Cleans up documentation and uses functional code --- .../scala/org/apache/spark/ml/ann/Layer.scala | 56 ++++++------------- 1 file changed, 16 insertions(+), 40 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index e5309fdc0116c..caa3288343bfd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -182,17 +182,17 @@ private[ann] object AffineLayerModel { * @param position position of weights for this layer * @param numIn number of layer inputs * @param numOut number of layer outputs - * @return matrix A and vector b + * @return matrix A and vector b, both views of the data in `weights` */ def unroll( weights: Vector, position: Int, numIn: Int, numOut: Int): (BDM[Double], BDV[Double]) = { - val weightsCopy = weights.toArray + val weightsArray = weights.toArray // TODO: the array is not copied to BDMs, make sure this is OK! - val a = new BDM[Double](numOut, numIn, weightsCopy, position) - val b = new BDV[Double](weightsCopy, position + (numOut * numIn), 1, numOut) + val a = new BDM[Double](numOut, numIn, weightsArray, position) + val b = new BDV[Double](weightsArray, position + (numOut * numIn), 1, numOut) (a, b) } @@ -200,14 +200,10 @@ private[ann] object AffineLayerModel { * Roll the layer weights into a vector. * @param a matrix A * @param b vector b - * @return vector of weights + * @return vector of weights, a copy of the data in `a` and `b` */ def roll(a: BDM[Double], b: BDV[Double]): Vector = { - val result = new Array[Double](a.size + b.length) - // TODO: make sure that we need to copy! - System.arraycopy(a.toArray, 0, result, 0, a.size) - System.arraycopy(b.toArray, 0, result, a.size, b.length) - Vectors.dense(result) + Vectors.dense(a.toArray ++ b.toArray) } /** @@ -542,7 +538,7 @@ private[ml] object FeedForwardTopology { */ def multiLayerPerceptron(layerSizes: Array[Int], softmax: Boolean = true): FeedForwardTopology = { val layers = new Array[Layer]((layerSizes.length - 1) * 2) - for(i <- 0 until layerSizes.length - 1){ + for (i <- 0 until layerSizes.length - 1) { layers(i * 2) = new AffineLayer(layerSizes(i), layerSizes(i + 1)) layers(i * 2 + 1) = if (softmax && i == layerSizes.length - 2) { @@ -588,45 +584,25 @@ private[ml] class FeedForwardModel private( } deltas(L) = new BDM[Double](0, 0) deltas(L - 1) = newE + // backwards pass (back-propagate deltas given errors) for (i <- (L - 2) to (0, -1)) { deltas(i) = layerModels(i + 1).prevDelta(deltas(i + 1), outputs(i + 1)) } - val grads = new Array[Array[Double]](layerModels.length) - for (i <- 0 until layerModels.length) { - val input = if (i==0) data else outputs(i - 1) - grads(i) = layerModels(i).grad(deltas(i), input) + // forwards pass (forward-propagate gradients given inputs) + val grads = layerModels.zipWithIndex.map { case (layer, i) => + val input = if (i == 0) data else outputs(i - 1) + layer.grad(deltas(i), input) } - // update cumGradient + // update cumulative gradients val cumGradientArray = cumGradient.toArray - var offset = 0 - // TODO: extract roll - for (i <- 0 until grads.length) { - val gradArray = grads(i) - var k = 0 - while (k < gradArray.length) { - cumGradientArray(offset + k) += gradArray(k) - k += 1 - } - offset += gradArray.length + grads.flatten.zipWithIndex.foreach { case (newGrad, i) => + cumGradientArray(i) += newGrad } newError } - // TODO: do we really need to copy the weights? they should be read-only override def weights(): Vector = { - // TODO: extract roll - var size = 0 - for (i <- 0 until layerModels.length) { - size += layerModels(i).size - } - val array = new Array[Double](size) - var offset = 0 - for (i <- 0 until layerModels.length) { - val layerWeights = layerModels(i).weights().toArray - System.arraycopy(layerWeights, 0, array, offset, layerWeights.length) - offset += layerWeights.length - } - Vectors.dense(array) + Vectors.dense(layerModels.flatMap(_.weights().toArray)) } override def predict(data: Vector): Vector = { From 84f8bea0f4a3b24f8aae6eadd98d7166311f951d Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Mon, 7 Sep 2015 14:27:53 -0700 Subject: [PATCH 05/10] Vectorizes linalg using ufuncs and vector ops --- .../scala/org/apache/spark/ml/ann/Layer.scala | 115 +++++++++--------- 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index caa3288343bfd..0187243463b40 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -17,8 +17,8 @@ package org.apache.spark.ml.ann -import breeze.linalg.{*, DenseMatrix => BDM, DenseVector => BDV, Vector => BV, axpy => Baxpy, - sum => Bsum} +import breeze.generic.{MappingUFunc, UFunc} +import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, Vector => BV, axpy => Baxpy, sum => Bsum, max => Bmax, *} import breeze.numerics.{log => Blog, sigmoid => Bsigmoid} import org.apache.spark.mllib.linalg.{Vector, Vectors} @@ -203,6 +203,7 @@ private[ann] object AffineLayerModel { * @return vector of weights, a copy of the data in `a` and `b` */ def roll(a: BDM[Double], b: BDV[Double]): Vector = { + // TODO: the array is copied to Vector, make sure this is necessary! Vectors.dense(a.toArray ++ b.toArray) } @@ -266,32 +267,18 @@ private[ann] trait ActivationFunction extends Serializable { */ private[ann] object ActivationFunction { - def apply(x: BDM[Double], y: BDM[Double], func: Double => Double): Unit = { - var i = 0 - while (i < x.rows) { - var j = 0 - while (j < x.cols) { - y(i, j) = func(x(i, j)) - j += 1 - } - i += 1 - } + def apply(x: BDM[Double], y: BDM[Double], func: UFunc with MappingUFunc)( + implicit impl: func.Impl[BDM[Double], BDM[Double]]): Unit = { + y := func(x) } def apply( x1: BDM[Double], x2: BDM[Double], y: BDM[Double], - func: (Double, Double) => Double): Unit = { - var i = 0 - while (i < x1.rows) { - var j = 0 - while (j < x1.cols) { - y(i, j) = func(x1(i, j), x2(i, j)) - j += 1 - } - i += 1 - } + func: UFunc with MappingUFunc)( + implicit impl: func.Impl2[BDM[Double], BDM[Double], BDM[Double]]): Unit = { + y := func(x1, x2) } } @@ -300,31 +287,11 @@ private[ann] object ActivationFunction { */ private[ann] class SoftmaxFunction extends ActivationFunction { override def eval(x: BDM[Double], y: BDM[Double]): Unit = { - var j = 0 - // find max value to make sure later that exponent is computable - while (j < x.cols) { - var i = 0 - var max = Double.MinValue - while (i < x.rows) { - if (x(i, j) > max) { - max = x(i, j) - } - i += 1 - } - var sum = 0.0 - i = 0 - while (i < x.rows) { - val res = Math.exp(x(i, j) - max) - y(i, j) = res - sum += res - i += 1 - } - i = 0 - while (i < x.rows) { - y(i, j) /= sum - i += 1 - } - j += 1 + (0 until x.cols).foreach { j => + // find max value to scale and prevent overflow during exp + val maxVal = Bmax(x(::,j)) + y(::,j) := x(::,j).map { xVal => Math.exp(xVal) - maxVal } + y(::,j) :/= Bsum(y(::,j)) } } @@ -332,16 +299,27 @@ private[ann] class SoftmaxFunction extends ActivationFunction { output: BDM[Double], target: BDM[Double], result: BDM[Double]): Double = { - def m(o: Double, t: Double): Double = o - t ActivationFunction(output, target, result, m) -Bsum( target :* Blog(output)) / output.cols } + private object m extends UFunc with MappingUFunc { + implicit val implDoubleDouble: Impl2[Double, Double, Double] = + new Impl2[Double, Double, Double] { + def apply(o: Double, t: Double): Double = o - t + } + } + override def derivative(x: BDM[Double], y: BDM[Double]): Unit = { - def sd(z: Double): Double = (1 - z) * z ActivationFunction(x, y, sd) } + private object sd extends UFunc with MappingUFunc { + implicit val implDouble: Impl[Double, Double] = new Impl[Double, Double] { + def apply(z: Double) = (1 - z) * z + } + } + override def squared(output: BDM[Double], target: BDM[Double], result: BDM[Double]): Double = { throw new UnsupportedOperationException("Sorry, squared error is not defined for Softmax.") } @@ -352,33 +330,54 @@ private[ann] class SoftmaxFunction extends ActivationFunction { */ private[ann] class SigmoidFunction extends ActivationFunction { override def eval(x: BDM[Double], y: BDM[Double]): Unit = { - def s(z: Double): Double = Bsigmoid(z) ActivationFunction(x, y, s) } + private object s extends UFunc with MappingUFunc { + implicit val implDouble: Impl[Double, Double] = new Impl[Double, Double] { + def apply(z: Double): Double = Bsigmoid(z) + } + } + override def crossEntropy( output: BDM[Double], target: BDM[Double], result: BDM[Double]): Double = { - def m(o: Double, t: Double): Double = o - t ActivationFunction(output, target, result, m) -Bsum(target :* Blog(output)) / output.cols } + private object m extends UFunc with MappingUFunc { + implicit val implDoubleDouble: Impl2[Double, Double, Double] = + new Impl2[Double, Double, Double] { + def apply(o: Double, t: Double): Double = o - t + } + } + override def derivative(x: BDM[Double], y: BDM[Double]): Unit = { - def sd(z: Double): Double = (1 - z) * z ActivationFunction(x, y, sd) } + private object sd extends UFunc with MappingUFunc { + implicit val implDouble: Impl[Double, Double] = + new Impl[Double, Double] { + def apply(z: Double) = (1 - z) * z + } + } + override def squared(output: BDM[Double], target: BDM[Double], result: BDM[Double]): Double = { - // TODO: make it readable - def m(o: Double, t: Double): Double = (o - t) ActivationFunction(output, target, result, m) - val e = Bsum(result :* result) / 2 / output.cols - def m2(x: Double, o: Double) = x * (o - o * o) + val e = (Bsum(result :* result) / 2) / output.cols ActivationFunction(result, output, result, m2) e } + + private object m2 extends UFunc with MappingUFunc { + implicit val implDoubleDouble: Impl2[Double, Double, Double] = + new Impl2[Double, Double, Double] { + def apply(x: Double, o: Double): Double = x * (o - o * o) + } + } } /** @@ -582,13 +581,13 @@ private[ml] class FeedForwardModel private( case _ => throw new UnsupportedOperationException("Non-functional layer not supported at the top") } + // backward pass (back-propagate deltas given errors) deltas(L) = new BDM[Double](0, 0) deltas(L - 1) = newE - // backwards pass (back-propagate deltas given errors) for (i <- (L - 2) to (0, -1)) { deltas(i) = layerModels(i + 1).prevDelta(deltas(i + 1), outputs(i + 1)) } - // forwards pass (forward-propagate gradients given inputs) + // forward pass (forward-propagate gradients given inputs) val grads = layerModels.zipWithIndex.map { case (layer, i) => val input = if (i == 0) data else outputs(i - 1) layer.grad(deltas(i), input) From 1f8ef665a176e0b31feaf22bb06bc32b5f050192 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Mon, 7 Sep 2015 19:35:10 -0700 Subject: [PATCH 06/10] Cleans up typos in BreezeUtil --- mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala index c05097cd8d58a..5ab775f8db332 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/BreezeUtil.scala @@ -39,7 +39,7 @@ private[ann] object BreezeUtil { def dgemm(alpha: Double, a: BDM[Double], b: BDM[Double], beta: Double, c: BDM[Double]): Unit = { require(a.cols == b.rows, "A & B Dimension mismatch!") require(a.rows == c.rows, "A & C Dimension mismatch!") - require(b.cols == c.cols, "A & C Dimension mismatch!") + require(b.cols == c.cols, "B & C Dimension mismatch!") NativeBLAS.dgemm(transposeString(a), transposeString(b), c.rows, c.cols, a.cols, alpha, a.data, a.offset, a.majorStride, b.data, b.offset, b.majorStride, beta, c.data, c.offset, c.rows) @@ -54,7 +54,7 @@ private[ann] object BreezeUtil { * @param y y */ def dgemv(alpha: Double, a: BDM[Double], x: BDV[Double], beta: Double, y: BDV[Double]): Unit = { - require(a.cols == x.length, "A & b Dimension mismatch!") + require(a.cols == x.length, "A & x Dimension mismatch!") NativeBLAS.dgemv(transposeString(a), a.rows, a.cols, alpha, a.data, a.offset, a.majorStride, x.data, x.offset, x.stride, beta, y.data, y.offset, y.stride) From 22ba174f1880fb99fe778bd2fafc54f0afe3ba01 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Mon, 7 Sep 2015 19:38:09 -0700 Subject: [PATCH 07/10] Fixes style issues --- .../scala/org/apache/spark/ml/ann/Layer.scala | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index 0187243463b40..24471d2f29350 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -289,9 +289,9 @@ private[ann] class SoftmaxFunction extends ActivationFunction { override def eval(x: BDM[Double], y: BDM[Double]): Unit = { (0 until x.cols).foreach { j => // find max value to scale and prevent overflow during exp - val maxVal = Bmax(x(::,j)) - y(::,j) := x(::,j).map { xVal => Math.exp(xVal) - maxVal } - y(::,j) :/= Bsum(y(::,j)) + val maxVal = Bmax(x(::, j)) + y(::, j) := x(::, j).map { xVal => Math.exp(xVal) - maxVal } + y(::, j) :/= Bsum(y(::, j)) } } @@ -299,11 +299,11 @@ private[ann] class SoftmaxFunction extends ActivationFunction { output: BDM[Double], target: BDM[Double], result: BDM[Double]): Double = { - ActivationFunction(output, target, result, m) + ActivationFunction(output, target, result, OutputMinusTarget) -Bsum( target :* Blog(output)) / output.cols } - private object m extends UFunc with MappingUFunc { + private object OutputMinusTarget extends UFunc with MappingUFunc { implicit val implDoubleDouble: Impl2[Double, Double, Double] = new Impl2[Double, Double, Double] { def apply(o: Double, t: Double): Double = o - t @@ -311,10 +311,10 @@ private[ann] class SoftmaxFunction extends ActivationFunction { } override def derivative(x: BDM[Double], y: BDM[Double]): Unit = { - ActivationFunction(x, y, sd) + ActivationFunction(x, y, SoftmaxDerivative) } - private object sd extends UFunc with MappingUFunc { + private object SoftmaxDerivative extends UFunc with MappingUFunc { implicit val implDouble: Impl[Double, Double] = new Impl[Double, Double] { def apply(z: Double) = (1 - z) * z } @@ -330,10 +330,10 @@ private[ann] class SoftmaxFunction extends ActivationFunction { */ private[ann] class SigmoidFunction extends ActivationFunction { override def eval(x: BDM[Double], y: BDM[Double]): Unit = { - ActivationFunction(x, y, s) + ActivationFunction(x, y, Sigmoid) } - private object s extends UFunc with MappingUFunc { + private object Sigmoid extends UFunc with MappingUFunc { implicit val implDouble: Impl[Double, Double] = new Impl[Double, Double] { def apply(z: Double): Double = Bsigmoid(z) } @@ -343,11 +343,11 @@ private[ann] class SigmoidFunction extends ActivationFunction { output: BDM[Double], target: BDM[Double], result: BDM[Double]): Double = { - ActivationFunction(output, target, result, m) + ActivationFunction(output, target, result, OutputMinusTarget) -Bsum(target :* Blog(output)) / output.cols } - private object m extends UFunc with MappingUFunc { + private object OutputMinusTarget extends UFunc with MappingUFunc { implicit val implDoubleDouble: Impl2[Double, Double, Double] = new Impl2[Double, Double, Double] { def apply(o: Double, t: Double): Double = o - t @@ -355,10 +355,10 @@ private[ann] class SigmoidFunction extends ActivationFunction { } override def derivative(x: BDM[Double], y: BDM[Double]): Unit = { - ActivationFunction(x, y, sd) + ActivationFunction(x, y, SigmoidDerivative) } - private object sd extends UFunc with MappingUFunc { + private object SigmoidDerivative extends UFunc with MappingUFunc { implicit val implDouble: Impl[Double, Double] = new Impl[Double, Double] { def apply(z: Double) = (1 - z) * z @@ -366,7 +366,7 @@ private[ann] class SigmoidFunction extends ActivationFunction { } override def squared(output: BDM[Double], target: BDM[Double], result: BDM[Double]): Double = { - ActivationFunction(output, target, result, m) + ActivationFunction(output, target, result, OutputMinusTarget) val e = (Bsum(result :* result) / 2) / output.cols ActivationFunction(result, output, result, m2) e From abdba81135154aab3065fe8316ca51b1cd03885f Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Mon, 7 Sep 2015 22:29:42 -0700 Subject: [PATCH 08/10] Fixes style errors --- mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index 24471d2f29350..be50ba821784a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -368,11 +368,11 @@ private[ann] class SigmoidFunction extends ActivationFunction { override def squared(output: BDM[Double], target: BDM[Double], result: BDM[Double]): Double = { ActivationFunction(output, target, result, OutputMinusTarget) val e = (Bsum(result :* result) / 2) / output.cols - ActivationFunction(result, output, result, m2) + ActivationFunction(result, output, result, ResultTimesOutputMinusOutputSquared) e } - private object m2 extends UFunc with MappingUFunc { + private object ResultTimesOutputMinusOutputSquared extends UFunc with MappingUFunc { implicit val implDoubleDouble: Impl2[Double, Double, Double] = new Impl2[Double, Double, Double] { def apply(x: Double, o: Double): Double = x * (o - o * o) From f6731ff10a5bf671d1d727cf590bccd7fb6e13c1 Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Tue, 8 Sep 2015 10:09:33 -0700 Subject: [PATCH 09/10] Reverts noops and fixes unit tests --- .../scala/org/apache/spark/ml/ann/Layer.scala | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index be50ba821784a..f0f2357fbf3b8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -288,9 +288,10 @@ private[ann] object ActivationFunction { private[ann] class SoftmaxFunction extends ActivationFunction { override def eval(x: BDM[Double], y: BDM[Double]): Unit = { (0 until x.cols).foreach { j => - // find max value to scale and prevent overflow during exp + // subtract max value to prevent overflow during exp + // does not affect correctness since we normalize right after val maxVal = Bmax(x(::, j)) - y(::, j) := x(::, j).map { xVal => Math.exp(xVal) - maxVal } + y(::, j) := breeze.numerics.exp(x(::, j) - maxVal) y(::, j) :/= Bsum(y(::, j)) } } @@ -819,34 +820,42 @@ private[ml] class FeedForwardTrainer( /** * Sets the updater. - * @param updater updater + * @param value updater * @return trainer */ - def setUpdater(updater: Updater): FeedForwardTrainer = { - _updater = updater - optimizer match { - case lbfgs: LBFGS => lbfgs.setUpdater(updater) - case sgd: GradientDescent => sgd.setUpdater(updater) - case other => throw new UnsupportedOperationException( - s"Only LBFGS and GradientDescent are supported but got ${other.getClass}.") - } + def setUpdater(value: Updater): FeedForwardTrainer = { + _updater = value + updateUpdater(value) this } /** * Sets the gradient. - * @param gradient gradient + * @param value gradient * @return trainer */ - def setGradient(gradient: Gradient): FeedForwardTrainer = { - _gradient = gradient + def setGradient(value: Gradient): FeedForwardTrainer = { + _gradient = value + updateGradient(value) + this + } + + private[this] def updateGradient(gradient: Gradient): Unit = { optimizer match { case lbfgs: LBFGS => lbfgs.setGradient(gradient) case sgd: GradientDescent => sgd.setGradient(gradient) case other => throw new UnsupportedOperationException( s"Only LBFGS and GradientDescent are supported but got ${other.getClass}.") } - this + } + + private[this] def updateUpdater(updater: Updater): Unit = { + optimizer match { + case lbfgs: LBFGS => lbfgs.setUpdater(updater) + case sgd: GradientDescent => sgd.setUpdater(updater) + case other => throw new UnsupportedOperationException( + s"Only LBFGS and GradientDescent are supported but got ${other.getClass}.") + } } /** From f56e2d5301f10f23c985defa63a4461a9e8d0f1b Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Fri, 11 Sep 2015 12:10:45 -0700 Subject: [PATCH 10/10] Reverts flatten/flatMap changes --- .../scala/org/apache/spark/ml/ann/Layer.scala | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index f0f2357fbf3b8..f9fcc47076a45 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -595,14 +595,35 @@ private[ml] class FeedForwardModel private( } // update cumulative gradients val cumGradientArray = cumGradient.toArray - grads.flatten.zipWithIndex.foreach { case (newGrad, i) => - cumGradientArray(i) += newGrad + var offset = 0 + // TODO: extract roll + for (i <- 0 until grads.length) { + val gradArray = grads(i) + var k = 0 + while (k < gradArray.length) { + cumGradientArray(offset + k) += gradArray(k) + k += 1 + } + offset += gradArray.length } newError } + // TODO: do we really need to copy the weights? they should be read-only override def weights(): Vector = { - Vectors.dense(layerModels.flatMap(_.weights().toArray)) + // TODO: extract roll + var size = 0 + for (i <- 0 until layerModels.length) { + size += layerModels(i).size + } + val array = new Array[Double](size) + var offset = 0 + for (i <- 0 until layerModels.length) { + val layerWeights = layerModels(i).weights().toArray + System.arraycopy(layerWeights, 0, array, offset, layerWeights.length) + offset += layerWeights.length + } + Vectors.dense(array) } override def predict(data: Vector): Vector = {