root-project · lmoneta · Jun 27, 2018 · Jun 22, 2018 · Jun 22, 2018 · Jun 22, 2018
@@ -46,21 +46,35 @@ class TConvLayer : public VGeneralLayer<Architecture_t> {
    using Scalar_t = typename Architecture_t::Scalar_t;
 
 private:
+   bool inline isInteger(Scalar_t x) const { return x == floor(x); }
+
+   /* Calculate the output dimension of the convolutional layer */
+   size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride);
+
+   /* Calculate the number of pixels in a single receptive field */
+   size_t inline calculateNLocalViewPixels(size_t depth, size_t height, size_t width) { return depth * height * width; }
+
+   /* Calculate the number of receptive fields in an image given the filter and image sizes */
+   size_t calculateNLocalViews(size_t inputHeight, size_t filterHeight, size_t paddingHeight, size_t strideRows,
+                               size_t inputWidth, size_t filterWidth, size_t paddingWidth, size_t strideCols);
+
+protected:
    size_t fFilterDepth;  ///< The depth of the filter.
    size_t fFilterHeight; ///< The height of the filter.
    size_t fFilterWidth;  ///< The width of the filter.
 
    size_t fStrideRows; ///< The number of row pixels to slid the filter each step.
    size_t fStrideCols; ///< The number of column pixels to slid the filter each step.
 
-   size_t fPaddingHeight; ///< The number of zero layers added top and bottom of the input.
-   size_t fPaddingWidth;  ///< The number of zero layers left and right of the input.
-
    size_t fNLocalViewPixels; ///< The number of pixels in one local image view.
    size_t fNLocalViews;      ///< The number of local views in one image.
 
    Scalar_t fDropoutProbability; ///< Probability that an input is active.
 
+private:
+   size_t fPaddingHeight; ///< The number of zero layers added top and bottom of the input.
+   size_t fPaddingWidth;  ///< The number of zero layers left and right of the input.
+
    std::vector<Matrix_t> fDerivatives; ///< First fDerivatives of the activations of this layer.
 
    std::vector<int> fForwardIndices;  ///< Vector of indices used for a fast Im2Col in forward pass
@@ -71,11 +85,10 @@ class TConvLayer : public VGeneralLayer<Architecture_t> {
    ERegularization fReg;   ///< The regularization method.
    Scalar_t fWeightDecay;  ///< The weight decay.
 
+
 public:
    /*! Constructor. */
-   TConvLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height,
-              size_t Width, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNRows, size_t BiasesNCols,
-              size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init, size_t FilterDepth,
+   TConvLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, EInitialization Init,
               size_t FilterHeight, size_t FilterWidth, size_t StrideRows, size_t StrideCols, size_t PaddingHeight,
               size_t PaddingWidth, Scalar_t DropoutProbability, EActivationFunction f, ERegularization Reg,
               Scalar_t WeightDecay);
@@ -145,23 +158,32 @@ class TConvLayer : public VGeneralLayer<Architecture_t> {
 //______________________________________________________________________________
 template <typename Architecture_t>
 TConvLayer<Architecture_t>::TConvLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
-                                       size_t depth, size_t height, size_t width, size_t weightsNRows,
-                                       size_t weightsNCols, size_t biasesNRows, size_t biasesNCols,
-                                       size_t outputNSlices, size_t outputNRows, size_t outputNCols,
-                                       EInitialization init, size_t filterDepth, size_t filterHeight,
-                                       size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight,
-                                       size_t paddingWidth, Scalar_t dropoutProbability, EActivationFunction f,
-                                       ERegularization reg, Scalar_t weightDecay)
-   : VGeneralLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width, 1,
-                                   weightsNRows, weightsNCols, 1, biasesNRows, biasesNCols, outputNSlices, outputNRows,
-                                   outputNCols, init),
-     fFilterDepth(filterDepth), fFilterHeight(filterHeight), fFilterWidth(filterWidth), fStrideRows(strideRows),
-     fStrideCols(strideCols), fPaddingHeight(paddingHeight), fPaddingWidth(paddingWidth),
-     fNLocalViewPixels(filterDepth * filterHeight * filterWidth), fNLocalViews(height * width),
-     fDropoutProbability(dropoutProbability), fDerivatives(), fF(f), fReg(reg), fWeightDecay(weightDecay)
+                                       size_t depth, EInitialization init, size_t filterHeight, size_t filterWidth,
+                                       size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth,
+                                       Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
+                                       Scalar_t weightDecay)
+   : VGeneralLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth,
+                                   calculateDimension(inputHeight, filterHeight, paddingHeight, strideRows),
+                                   calculateDimension(inputWidth, filterWidth, paddingWidth, strideCols),
+                                   1, depth, calculateNLocalViewPixels(inputDepth, filterHeight, filterWidth),
+                                   1, depth, 1, batchSize, depth,
+                                   calculateNLocalViews(inputHeight, filterHeight, paddingHeight, strideRows,
+                                                        inputWidth, filterWidth, paddingWidth, strideCols),
+                                   init),
+     fFilterDepth(inputDepth), fFilterHeight(filterHeight), fFilterWidth(filterWidth), fStrideRows(strideRows),
+     fStrideCols(strideCols), fNLocalViewPixels(calculateNLocalViewPixels(inputDepth, filterHeight, filterWidth)),
+     fNLocalViews(calculateNLocalViews(inputHeight, filterHeight, paddingHeight, strideRows,
+                                       inputWidth, filterWidth, paddingWidth, strideCols)),
+     fDropoutProbability(dropoutProbability), fPaddingHeight(paddingHeight), fPaddingWidth(paddingWidth),
+     fDerivatives(), fF(f), fReg(reg), fWeightDecay(weightDecay)
 {
-   for (size_t i = 0; i < outputNSlices; i++) {
-      fDerivatives.emplace_back(outputNRows, outputNCols);
+   /** Each element in the vector is a `T_Matrix` representing an event, therefore `vec.size() == batchSize`.
+    *  Cells in these matrices are distributed in the following manner:
+    *  Each row represents a single feature map, therefore we have `nRows == depth`.
+    *  Each column represents a single pixel in that feature map, therefore we have `nCols == nLocalViews`.
+    **/
+   for (size_t i = 0; i < batchSize; i++) {
+      fDerivatives.emplace_back(depth, fNLocalViews);
    }
 }
 
@@ -351,6 +373,28 @@ void TConvLayer<Architecture_t>::ReadWeightsFromXML(void *parent)
    this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
 }
 
+template <typename Architecture_t>
+size_t TConvLayer<Architecture_t>::calculateDimension(size_t  imgDim, size_t  fltDim, size_t  padding, size_t  stride)
+{
+    Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
+    if (!isInteger(dimension) || dimension <= 0) {
+        Fatal("calculateDimension", "Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
+              imgDim, fltDim, padding, stride);
+    }
+
+    return (size_t)dimension;
+}
+
+template <typename Architecture_t>
+size_t TConvLayer<Architecture_t>::calculateNLocalViews(size_t inputHeight, size_t  filterHeight, size_t  paddingHeight,
+                                                        size_t  strideRows, size_t  inputWidth, size_t  filterWidth,
+                                                        size_t  paddingWidth, size_t  strideCols)
+{
+    int height = calculateDimension(inputHeight, filterHeight, paddingHeight, strideRows);
+    int width = calculateDimension(inputWidth, filterWidth, paddingWidth, strideCols);
+
+    return height * width;
+}
 
 } // namespace CNN
 } // namespace DNN

@@ -29,7 +29,7 @@
 
 #include "TMatrix.h"
 
-#include "TMVA/DNN/GeneralLayer.h"
+#include "TMVA/DNN/CNN/ConvLayer.h"
 #include "TMVA/DNN/Functions.h"
 
 #include <iostream>
@@ -43,38 +43,30 @@ namespace CNN {
     Generic Max Pooling Layer class.
 
     This generic Max Pooling Layer Class represents a pooling layer of
-    a CNN. It inherits all of the properties of the generic virtual base class
-    VGeneralLayer. In addition to that, it contains a matrix of winning units.
+    a CNN. It inherits all of the properties of the convolutional layer
+    TConvLayer, but it overrides the propagation methods. In a sense, max pooling
+    can be seen as non-linear convolution: a filter slides over the input and produces
+    one element as a function of the the elements within the receptive field.
+    In addition to that, it contains a matrix of winning units.
 
     The height and width of the weights and biases is set to 0, since this
     layer does not contain any weights.
 
  */
 template <typename Architecture_t>
-class TMaxPoolLayer : public VGeneralLayer<Architecture_t> {
+class TMaxPoolLayer : public TConvLayer<Architecture_t> {
+
 public:
-   using Matrix_t = typename Architecture_t::Matrix_t;
-   using Scalar_t = typename Architecture_t::Scalar_t;
+    using Matrix_t = typename Architecture_t::Matrix_t;
+    using Scalar_t = typename Architecture_t::Scalar_t;
 
 private:
    std::vector<Matrix_t> indexMatrix; ///< Matrix of indices for the backward pass.
 
-   size_t fFrameHeight; ///< The height of the frame.
-   size_t fFrameWidth;  ///< The width of the frame.
-
-   size_t fStrideRows; ///< The number of row pixels to slid the filter each step.
-   size_t fStrideCols; ///< The number of column pixels to slid the filter each step.
-
-   size_t fNLocalViewPixels; ///< The number of pixels in one local image view.
-   size_t fNLocalViews;      ///< The number of local views in one image.
-
-   Scalar_t fDropoutProbability; ///< Probability that an input is active.
-
 public:
    /*! Constructor. */
-   TMaxPoolLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Height,
-                 size_t Width, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, size_t FrameHeight,
-                 size_t FrameWidth, size_t StrideRows, size_t StrideCols, Scalar_t DropoutProbability);
+   TMaxPoolLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t FilterHeight,
+                 size_t FilterWidth, size_t StrideRows, size_t StrideCols, Scalar_t DropoutProbability);
 
    /*! Copy the max pooling layer provided as a pointer */
    TMaxPoolLayer(TMaxPoolLayer<Architecture_t> *layer);
@@ -104,37 +96,25 @@ class TMaxPoolLayer : public VGeneralLayer<Architecture_t> {
    /*! Read the information and the weights about the layer from XML node. */
    virtual void ReadWeightsFromXML(void *parent);
 
-
    /*! Prints the info about the layer. */
    void Print() const;
 
    /*! Getters */
    const std::vector<Matrix_t> &GetIndexMatrix() const { return indexMatrix; }
    std::vector<Matrix_t> &GetIndexMatrix() { return indexMatrix; }
 
-   size_t GetFrameHeight() const { return fFrameHeight; }
-   size_t GetFrameWidth() const { return fFrameWidth; }
-
-   size_t GetStrideRows() const { return fStrideRows; }
-   size_t GetStrideCols() const { return fStrideCols; }
-
-   size_t GetNLocalViewPixels() const { return fNLocalViewPixels; }
-   size_t GetNLocalViews() const { return fNLocalViews; }
-
-   Scalar_t GetDropoutProbability() const { return fDropoutProbability; }
 };
 
 //______________________________________________________________________________
 template <typename Architecture_t>
 TMaxPoolLayer<Architecture_t>::TMaxPoolLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
-                                             size_t height, size_t width, size_t outputNSlices, size_t outputNRows,
-                                             size_t outputNCols, size_t frameHeight, size_t frameWidth,
-                                             size_t strideRows, size_t strideCols, Scalar_t dropoutProbability)
-   : VGeneralLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, inputDepth, height, width, 0, 0, 0,
-                                   0, 0, 0, outputNSlices, outputNRows, outputNCols, EInitialization::kZero),
-     indexMatrix(), fFrameHeight(frameHeight), fFrameWidth(frameWidth), fStrideRows(strideRows),
-     fStrideCols(strideCols), fNLocalViewPixels(inputDepth * frameHeight * frameWidth), fNLocalViews(height * width),
-     fDropoutProbability(dropoutProbability)
+                                             size_t filterHeight, size_t filterWidth, size_t strideRows,
+                                             size_t strideCols, Scalar_t dropoutProbability)
+
+        : TConvLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, inputDepth, EInitialization::kZero,
+                                     filterHeight, filterWidth, strideRows, strideCols, 0, 0, dropoutProbability,
+                                     EActivationFunction::kIdentity, ERegularization::kNone, 0),
+          indexMatrix()
 {
    for (size_t i = 0; i < this->GetBatchSize(); i++) {
       indexMatrix.emplace_back(this->GetDepth(), this->GetNLocalViews());
@@ -144,10 +124,7 @@ TMaxPoolLayer<Architecture_t>::TMaxPoolLayer(size_t batchSize, size_t inputDepth
 //______________________________________________________________________________
 template <typename Architecture_t>
 TMaxPoolLayer<Architecture_t>::TMaxPoolLayer(TMaxPoolLayer<Architecture_t> *layer)
-   : VGeneralLayer<Architecture_t>(layer), indexMatrix(), fFrameHeight(layer->GetFrameHeight()),
-     fFrameWidth(layer->GetFrameWidth()), fStrideRows(layer->GetStrideRows()), fStrideCols(layer->GetStrideCols()),
-     fNLocalViewPixels(layer->GetNLocalViewPixels()), fNLocalViews(layer->GetNLocalViews()),
-     fDropoutProbability(layer->GetDropoutProbability())
+   : TConvLayer<Architecture_t>(layer), indexMatrix()
 {
    for (size_t i = 0; i < layer->GetBatchSize(); i++) {
       indexMatrix.emplace_back(layer->GetDepth(), layer->GetNLocalViews());
@@ -157,10 +134,7 @@ TMaxPoolLayer<Architecture_t>::TMaxPoolLayer(TMaxPoolLayer<Architecture_t> *laye
 //______________________________________________________________________________
 template <typename Architecture_t>
 TMaxPoolLayer<Architecture_t>::TMaxPoolLayer(const TMaxPoolLayer &layer)
-   : VGeneralLayer<Architecture_t>(layer), indexMatrix(), fFrameHeight(layer.fFrameHeight),
-     fFrameWidth(layer.fFrameWidth), fStrideRows(layer.fStrideRows), fStrideCols(layer.fStrideCols),
-     fNLocalViewPixels(layer.fNLocalViewPixels), fNLocalViews(layer.fNLocalViews),
-     fDropoutProbability(layer.fDropoutProbability)
+   : TConvLayer<Architecture_t>(layer), indexMatrix()
 {
    for (size_t i = 0; i < layer.fBatchSize; i++) {
       indexMatrix.emplace_back(layer.fDepth, layer.fNLocalViews);
@@ -184,7 +158,7 @@ auto TMaxPoolLayer<Architecture_t>::Forward(std::vector<Matrix_t> &input, bool a
       }
 
       Architecture_t::Downsample(this->GetOutputAt(i), indexMatrix[i], input[i], this->GetInputHeight(),
-                                 this->GetInputWidth(), this->GetFrameHeight(), this->GetFrameWidth(),
+                                 this->GetInputWidth(), this->GetFilterHeight(), this->GetFilterWidth(),
                                  this->GetStrideRows(), this->GetStrideCols());
    }
 }
@@ -209,8 +183,8 @@ auto TMaxPoolLayer<Architecture_t>::Print() const -> void
    std::cout << " H = " << this->GetHeight() << " , ";
    std::cout << " D = " << this->GetDepth() << " ) ";
 
-   std::cout << "\t Frame ( W = " << this->GetFrameWidth() << " , ";
-   std::cout << " H = " << this->GetFrameHeight() << " ) ";
+   std::cout << "\t Filter ( W = " << this->GetFilterWidth() << " , ";
+   std::cout << " H = " << this->GetFilterHeight() << " ) ";
 
    if (this->GetOutput().size() > 0) {
       std::cout << "\tOutput = ( " << this->GetOutput().size() << " , " << this->GetOutput()[0].GetNrows() << " , " << this->GetOutput()[0].GetNcols() << " ) ";
@@ -225,8 +199,8 @@ void TMaxPoolLayer<Architecture_t>::AddWeightsXMLTo(void *parent)
    auto layerxml = gTools().xmlengine().NewChild(parent, 0, "MaxPoolLayer");
 
    // write  maxpool layer info
-   gTools().xmlengine().NewAttr(layerxml, 0, "FrameHeight", gTools().StringFromInt(this->GetFrameHeight()));
-   gTools().xmlengine().NewAttr(layerxml, 0, "FrameWidth", gTools().StringFromInt(this->GetFrameWidth()));
+   gTools().xmlengine().NewAttr(layerxml, 0, "FilterHeight", gTools().StringFromInt(this->GetFilterHeight()));
+   gTools().xmlengine().NewAttr(layerxml, 0, "FilterWidth", gTools().StringFromInt(this->GetFilterWidth()));
    gTools().xmlengine().NewAttr(layerxml, 0, "StrideRows", gTools().StringFromInt(this->GetStrideRows()));
    gTools().xmlengine().NewAttr(layerxml, 0, "StrideCols", gTools().StringFromInt(this->GetStrideCols()));