From 73fa77a97bebd715336ddd0aaf1356daf04a1f27 Mon Sep 17 00:00:00 2001 From: Emmanouil Stergiadis Date: Wed, 16 May 2018 13:59:32 +0200 Subject: [PATCH 1/4] Simplified the Convolutional Layer's API by eliminating redundant constructor arguments --- tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h | 72 ++++++++++++++++++++------ tmva/tmva/inc/TMVA/DNN/DeepNet.h | 25 ++------- 2 files changed, 59 insertions(+), 38 deletions(-) diff --git a/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h b/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h index 73b6eda106636..ec62423e084d1 100644 --- a/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h @@ -45,6 +45,19 @@ class TConvLayer : public VGeneralLayer { using Matrix_t = typename Architecture_t::Matrix_t; using Scalar_t = typename Architecture_t::Scalar_t; +private: + bool inline isInteger(Scalar_t x) const { return x == floor(x); } + + /* Calculate the output dimension of the convolutional layer */ + size_t calculateDimension(int imgDim, int fltDim, int padding, int stride); + + /* Calculate the number of pixels in a single receptive field */ + size_t inline calculateNLocalViewPixels(int depth, int height, int width) { return depth * height * width; } + + /* Calculate the number of receptive fields in an image given the filter and image sizes */ + size_t calculateNLocalViews(int inputHeight, int filterHeight, int paddingHeight, int strideRows, int inputWidth, + int filterWidth, int paddingWidth, int strideCols); + private: size_t fFilterDepth; ///< The depth of the filter. size_t fFilterHeight; ///< The height of the filter. @@ -71,11 +84,10 @@ class TConvLayer : public VGeneralLayer { ERegularization fReg; ///< The regularization method. Scalar_t fWeightDecay; ///< The weight decay. + public: /*! Constructor. */ - TConvLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, - size_t Width, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNRows, size_t BiasesNCols, - size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init, size_t FilterDepth, + TConvLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, EInitialization Init, size_t FilterHeight, size_t FilterWidth, size_t StrideRows, size_t StrideCols, size_t PaddingHeight, size_t PaddingWidth, Scalar_t DropoutProbability, EActivationFunction f, ERegularization Reg, Scalar_t WeightDecay); @@ -145,23 +157,27 @@ class TConvLayer : public VGeneralLayer { //______________________________________________________________________________ template TConvLayer::TConvLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth, - size_t depth, size_t height, size_t width, size_t weightsNRows, - size_t weightsNCols, size_t biasesNRows, size_t biasesNCols, - size_t outputNSlices, size_t outputNRows, size_t outputNCols, - EInitialization init, size_t filterDepth, size_t filterHeight, - size_t filterWidth, size_t strideRows, size_t strideCols, size_t paddingHeight, - size_t paddingWidth, Scalar_t dropoutProbability, EActivationFunction f, - ERegularization reg, Scalar_t weightDecay) - : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width, 1, - weightsNRows, weightsNCols, 1, biasesNRows, biasesNCols, outputNSlices, outputNRows, - outputNCols, init), - fFilterDepth(filterDepth), fFilterHeight(filterHeight), fFilterWidth(filterWidth), fStrideRows(strideRows), + size_t depth, EInitialization init, size_t filterHeight, size_t filterWidth, + size_t strideRows, size_t strideCols, size_t paddingHeight, size_t paddingWidth, + Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg, + Scalar_t weightDecay) + : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, depth, + calculateDimension(inputHeight, filterHeight, paddingHeight, strideRows), + calculateDimension(inputWidth, filterWidth, paddingWidth, strideCols), + 1, depth, calculateNLocalViewPixels(inputDepth, filterHeight, filterWidth), + 1, depth, 1, batchSize, depth, + calculateNLocalViews(inputHeight, filterHeight, paddingHeight, strideRows, + inputWidth, filterWidth, paddingWidth, strideCols), + init), + fFilterDepth(inputDepth), fFilterHeight(filterHeight), fFilterWidth(filterWidth), fStrideRows(strideRows), fStrideCols(strideCols), fPaddingHeight(paddingHeight), fPaddingWidth(paddingWidth), - fNLocalViewPixels(filterDepth * filterHeight * filterWidth), fNLocalViews(height * width), + fNLocalViewPixels(calculateNLocalViewPixels(inputDepth, filterHeight, filterWidth)), + fNLocalViews(calculateNLocalViews(inputHeight, filterHeight, paddingHeight, strideRows, + inputWidth, filterWidth, paddingWidth, strideCols)), fDropoutProbability(dropoutProbability), fDerivatives(), fF(f), fReg(reg), fWeightDecay(weightDecay) { - for (size_t i = 0; i < outputNSlices; i++) { - fDerivatives.emplace_back(outputNRows, outputNCols); + for (size_t i = 0; i < batchSize; i++) { + fDerivatives.emplace_back(depth, fNLocalViews); } } @@ -351,6 +367,28 @@ void TConvLayer::ReadWeightsFromXML(void *parent) this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0)); } +template +size_t TConvLayer::calculateDimension(int imgDim, int fltDim, int padding, int stride) +{ + Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1; + if (!isInteger(dimension) || dimension <= 0) { + Fatal("calculateDimension", "Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride) %d , %d , %d , %d", + imgDim, fltDim, padding, stride); + } + + return (size_t)dimension; +} + +template +size_t TConvLayer::calculateNLocalViews(int inputHeight, int filterHeight, int paddingHeight, + int strideRows, int inputWidth, int filterWidth, + int paddingWidth, int strideCols) +{ + int height = calculateDimension(inputHeight, filterHeight, paddingHeight, strideRows); + int width = calculateDimension(inputWidth, filterWidth, paddingWidth, strideCols); + + return height * width; +} } // namespace CNN } // namespace DNN diff --git a/tmva/tmva/inc/TMVA/DNN/DeepNet.h b/tmva/tmva/inc/TMVA/DNN/DeepNet.h index 386e5b71cc5a5..3cf7792e191bf 100644 --- a/tmva/tmva/inc/TMVA/DNN/DeepNet.h +++ b/tmva/tmva/inc/TMVA/DNN/DeepNet.h @@ -380,8 +380,8 @@ auto TDeepNet::calculateDimension(int imgDim, int fltDi { Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1; if (!isInteger(dimension) || dimension <= 0) { - this->Print(); - int iLayer = fLayers.size(); + this->Print(); + int iLayer = fLayers.size(); Fatal("calculateDimension","Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d", iLayer, imgDim, fltDim, padding, stride); // std::cout << " calculateDimension - Not compatible hyper parameters (imgDim, fltDim, padding, stride)" @@ -405,16 +405,6 @@ TConvLayer *TDeepNet::AddConvLayer(size size_t inputDepth; size_t inputHeight; size_t inputWidth; - size_t height; - size_t width; - size_t filterDepth; - size_t weightsNRows = depth; - size_t weightsNCols; - size_t biasesNRows = depth; - size_t biasesNCols = 1; - size_t outputNSlices = this->GetBatchSize(); - size_t outputNRows = depth; - size_t outputNCols; EInitialization init = this->GetInitialization(); ERegularization reg = this->GetRegularization(); Scalar_t decay = this->GetWeightDecay(); @@ -430,19 +420,12 @@ TConvLayer *TDeepNet::AddConvLayer(size inputWidth = lastLayer->GetWidth(); } - height = calculateDimension(inputHeight, filterHeight, paddingHeight, strideRows); - width = calculateDimension(inputWidth, filterWidth, paddingWidth, strideCols); - - filterDepth = inputDepth; - weightsNCols = filterDepth * filterHeight * filterWidth; - outputNCols = height * width; // Create the conv layer TConvLayer *convLayer = new TConvLayer( - batchSize, inputDepth, inputHeight, inputWidth, depth, height, width, weightsNRows, weightsNCols, biasesNRows, - biasesNCols, outputNSlices, outputNRows, outputNCols, init, filterDepth, filterHeight, filterWidth, strideRows, - strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay); + batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows, + strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay); fLayers.push_back(convLayer); return convLayer; From a730f72b2102fb90ca3241ed5bdc5d7cb659a245 Mon Sep 17 00:00:00 2001 From: Emmanouil Stergiadis Date: Wed, 16 May 2018 15:24:43 +0200 Subject: [PATCH 2/4] Simplified the MaxPooling Layer's API by eliminating redundant constructor arguments --- tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h | 55 +++++++++++++++++++---- tmva/tmva/inc/TMVA/DNN/DeepNet.h | 15 +------ 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h b/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h index 98ad5843e6bb1..4959bf3ebcf5a 100644 --- a/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h @@ -56,6 +56,19 @@ class TMaxPoolLayer : public VGeneralLayer { using Matrix_t = typename Architecture_t::Matrix_t; using Scalar_t = typename Architecture_t::Scalar_t; +private: + bool inline isInteger(Scalar_t x) const { return x == floor(x); } + + /* Calculate the output dimension of the convolutional layer */ + size_t calculateDimension(int imgDim, int fltDim, int stride); + + /* Calculate the number of pixels in a single receptive field */ + size_t inline calculateNLocalViewPixels(int depth, int height, int width) { return depth * height * width; } + + /* Calculate the number of receptive fields in an image given the filter and image sizes */ + size_t calculateNLocalViews(int inputHeight, int filterHeight, int strideRows, + int inputWidth, int filterWidth, int strideCols); + private: std::vector indexMatrix; ///< Matrix of indices for the backward pass. @@ -72,8 +85,7 @@ class TMaxPoolLayer : public VGeneralLayer { public: /*! Constructor. */ - TMaxPoolLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Height, - size_t Width, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, size_t FrameHeight, + TMaxPoolLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t FrameHeight, size_t FrameWidth, size_t StrideRows, size_t StrideCols, Scalar_t DropoutProbability); /*! Copy the max pooling layer provided as a pointer */ @@ -127,13 +139,18 @@ class TMaxPoolLayer : public VGeneralLayer { //______________________________________________________________________________ template TMaxPoolLayer::TMaxPoolLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth, - size_t height, size_t width, size_t outputNSlices, size_t outputNRows, - size_t outputNCols, size_t frameHeight, size_t frameWidth, - size_t strideRows, size_t strideCols, Scalar_t dropoutProbability) - : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, inputDepth, height, width, 0, 0, 0, - 0, 0, 0, outputNSlices, outputNRows, outputNCols, EInitialization::kZero), + size_t frameHeight, size_t frameWidth, size_t strideRows, + size_t strideCols, Scalar_t dropoutProbability) + : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, inputDepth, + calculateDimension(inputHeight, frameHeight, strideRows), + calculateDimension(inputWidth, frameWidth, strideCols), + 0, 0, 0, 0, 0, 0, batchSize, inputDepth /* I suspect this should be 1 instead */, + calculateNLocalViews(inputHeight, frameHeight, strideRows, + inputWidth, frameWidth, strideCols), + EInitialization::kZero), indexMatrix(), fFrameHeight(frameHeight), fFrameWidth(frameWidth), fStrideRows(strideRows), - fStrideCols(strideCols), fNLocalViewPixels(inputDepth * frameHeight * frameWidth), fNLocalViews(height * width), + fStrideCols(strideCols), fNLocalViewPixels(calculateNLocalViewPixels(inputDepth, frameHeight, frameWidth)), + fNLocalViews(calculateNLocalViews(inputHeight, frameHeight, strideRows, inputWidth, frameWidth, strideCols)), fDropoutProbability(dropoutProbability) { for (size_t i = 0; i < this->GetBatchSize(); i++) { @@ -239,6 +256,28 @@ void TMaxPoolLayer::ReadWeightsFromXML(void * /*parent */) // all info is read before - nothing to do } +template +size_t TMaxPoolLayer::calculateDimension(int imgDim, int fltDim, int stride) +{ + Scalar_t dimension = ((imgDim - fltDim) / stride) + 1; + if (!isInteger(dimension) || dimension <= 0) { + Fatal("calculateDimension", "Not compatible hyper parameters for layer - (imageDim, filterDim, stride) %d , %d , %d", + imgDim, fltDim, stride); + } + + return (size_t)dimension; +} + +template +size_t TMaxPoolLayer::calculateNLocalViews(int inputHeight, int filterHeight, int strideRows, + int inputWidth, int filterWidth, int strideCols) +{ + int height = calculateDimension(inputHeight, filterHeight, strideRows); + int width = calculateDimension(inputWidth, filterWidth, strideCols); + + return height * width; +} + } // namespace CNN } // namespace DNN } // namespace TMVA diff --git a/tmva/tmva/inc/TMVA/DNN/DeepNet.h b/tmva/tmva/inc/TMVA/DNN/DeepNet.h index 3cf7792e191bf..ac24af70d3074 100644 --- a/tmva/tmva/inc/TMVA/DNN/DeepNet.h +++ b/tmva/tmva/inc/TMVA/DNN/DeepNet.h @@ -448,11 +448,6 @@ TMaxPoolLayer *TDeepNet::AddMaxPoolLaye size_t inputDepth; size_t inputHeight; size_t inputWidth; - size_t height; - size_t width; - size_t outputNSlices = this->GetBatchSize(); - size_t outputNRows; - size_t outputNCols; if (fLayers.size() == 0) { inputDepth = this->GetInputDepth(); @@ -465,15 +460,9 @@ TMaxPoolLayer *TDeepNet::AddMaxPoolLaye inputWidth = lastLayer->GetWidth(); } - height = calculateDimension(inputHeight, frameHeight, 0, strideRows); - width = calculateDimension(inputWidth, frameWidth, 0, strideCols); - - outputNRows = inputDepth; - outputNCols = height * width; - TMaxPoolLayer *maxPoolLayer = new TMaxPoolLayer( - batchSize, inputDepth, inputHeight, inputWidth, height, width, outputNSlices, outputNRows, outputNCols, - frameHeight, frameWidth, strideRows, strideCols, dropoutProbability); + batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth, + strideRows, strideCols, dropoutProbability); // But this creates a copy or what? fLayers.push_back(maxPoolLayer); From 57a10e2e77c0935d47d8fab7a43a85a13238f2d8 Mon Sep 17 00:00:00 2001 From: Emmanouil Stergiadis Date: Thu, 17 May 2018 10:18:05 +0200 Subject: [PATCH 3/4] Modified the inheritance schema: Pooling now inherits from Conv layer. Renamed frame* to filter* for more standardized conventions --- tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h | 9 +- tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h | 115 +++++----------------- tmva/tmva/src/MethodDL.cxx | 24 ++--- 3 files changed, 42 insertions(+), 106 deletions(-) diff --git a/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h b/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h index ec62423e084d1..ff31d784acc31 100644 --- a/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h @@ -58,7 +58,7 @@ class TConvLayer : public VGeneralLayer { size_t calculateNLocalViews(int inputHeight, int filterHeight, int paddingHeight, int strideRows, int inputWidth, int filterWidth, int paddingWidth, int strideCols); -private: +protected: size_t fFilterDepth; ///< The depth of the filter. size_t fFilterHeight; ///< The height of the filter. size_t fFilterWidth; ///< The width of the filter. @@ -66,14 +66,15 @@ class TConvLayer : public VGeneralLayer { size_t fStrideRows; ///< The number of row pixels to slid the filter each step. size_t fStrideCols; ///< The number of column pixels to slid the filter each step. - size_t fPaddingHeight; ///< The number of zero layers added top and bottom of the input. - size_t fPaddingWidth; ///< The number of zero layers left and right of the input. - size_t fNLocalViewPixels; ///< The number of pixels in one local image view. size_t fNLocalViews; ///< The number of local views in one image. Scalar_t fDropoutProbability; ///< Probability that an input is active. +private: + size_t fPaddingHeight; ///< The number of zero layers added top and bottom of the input. + size_t fPaddingWidth; ///< The number of zero layers left and right of the input. + std::vector fDerivatives; ///< First fDerivatives of the activations of this layer. std::vector fForwardIndices; ///< Vector of indices used for a fast Im2Col in forward pass diff --git a/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h b/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h index 4959bf3ebcf5a..6678a8da8525f 100644 --- a/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/CNN/MaxPoolLayer.h @@ -29,7 +29,7 @@ #include "TMatrix.h" -#include "TMVA/DNN/GeneralLayer.h" +#include "TMVA/DNN/CNN/ConvLayer.h" #include "TMVA/DNN/Functions.h" #include @@ -43,50 +43,30 @@ namespace CNN { Generic Max Pooling Layer class. This generic Max Pooling Layer Class represents a pooling layer of - a CNN. It inherits all of the properties of the generic virtual base class - VGeneralLayer. In addition to that, it contains a matrix of winning units. + a CNN. It inherits all of the properties of the convolutional layer + TConvLayer, but it overrides the propagation methods. In a sense, max pooling + can be seen as non-linear convolution: a filter slides over the input and produces + one element as a function of the the elements within the receptive field. + In addition to that, it contains a matrix of winning units. The height and width of the weights and biases is set to 0, since this layer does not contain any weights. */ template -class TMaxPoolLayer : public VGeneralLayer { -public: - using Matrix_t = typename Architecture_t::Matrix_t; - using Scalar_t = typename Architecture_t::Scalar_t; - -private: - bool inline isInteger(Scalar_t x) const { return x == floor(x); } - - /* Calculate the output dimension of the convolutional layer */ - size_t calculateDimension(int imgDim, int fltDim, int stride); +class TMaxPoolLayer : public TConvLayer { - /* Calculate the number of pixels in a single receptive field */ - size_t inline calculateNLocalViewPixels(int depth, int height, int width) { return depth * height * width; } - - /* Calculate the number of receptive fields in an image given the filter and image sizes */ - size_t calculateNLocalViews(int inputHeight, int filterHeight, int strideRows, - int inputWidth, int filterWidth, int strideCols); +public: + using Matrix_t = typename Architecture_t::Matrix_t; + using Scalar_t = typename Architecture_t::Scalar_t; private: std::vector indexMatrix; ///< Matrix of indices for the backward pass. - size_t fFrameHeight; ///< The height of the frame. - size_t fFrameWidth; ///< The width of the frame. - - size_t fStrideRows; ///< The number of row pixels to slid the filter each step. - size_t fStrideCols; ///< The number of column pixels to slid the filter each step. - - size_t fNLocalViewPixels; ///< The number of pixels in one local image view. - size_t fNLocalViews; ///< The number of local views in one image. - - Scalar_t fDropoutProbability; ///< Probability that an input is active. - public: /*! Constructor. */ - TMaxPoolLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t FrameHeight, - size_t FrameWidth, size_t StrideRows, size_t StrideCols, Scalar_t DropoutProbability); + TMaxPoolLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t FilterHeight, + size_t FilterWidth, size_t StrideRows, size_t StrideCols, Scalar_t DropoutProbability); /*! Copy the max pooling layer provided as a pointer */ TMaxPoolLayer(TMaxPoolLayer *layer); @@ -116,7 +96,6 @@ class TMaxPoolLayer : public VGeneralLayer { /*! Read the information and the weights about the layer from XML node. */ virtual void ReadWeightsFromXML(void *parent); - /*! Prints the info about the layer. */ void Print() const; @@ -124,34 +103,18 @@ class TMaxPoolLayer : public VGeneralLayer { const std::vector &GetIndexMatrix() const { return indexMatrix; } std::vector &GetIndexMatrix() { return indexMatrix; } - size_t GetFrameHeight() const { return fFrameHeight; } - size_t GetFrameWidth() const { return fFrameWidth; } - - size_t GetStrideRows() const { return fStrideRows; } - size_t GetStrideCols() const { return fStrideCols; } - - size_t GetNLocalViewPixels() const { return fNLocalViewPixels; } - size_t GetNLocalViews() const { return fNLocalViews; } - - Scalar_t GetDropoutProbability() const { return fDropoutProbability; } }; //______________________________________________________________________________ template TMaxPoolLayer::TMaxPoolLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth, - size_t frameHeight, size_t frameWidth, size_t strideRows, + size_t filterHeight, size_t filterWidth, size_t strideRows, size_t strideCols, Scalar_t dropoutProbability) - : VGeneralLayer(batchSize, inputDepth, inputHeight, inputWidth, inputDepth, - calculateDimension(inputHeight, frameHeight, strideRows), - calculateDimension(inputWidth, frameWidth, strideCols), - 0, 0, 0, 0, 0, 0, batchSize, inputDepth /* I suspect this should be 1 instead */, - calculateNLocalViews(inputHeight, frameHeight, strideRows, - inputWidth, frameWidth, strideCols), - EInitialization::kZero), - indexMatrix(), fFrameHeight(frameHeight), fFrameWidth(frameWidth), fStrideRows(strideRows), - fStrideCols(strideCols), fNLocalViewPixels(calculateNLocalViewPixels(inputDepth, frameHeight, frameWidth)), - fNLocalViews(calculateNLocalViews(inputHeight, frameHeight, strideRows, inputWidth, frameWidth, strideCols)), - fDropoutProbability(dropoutProbability) + + : TConvLayer(batchSize, inputDepth, inputHeight, inputWidth, inputDepth, EInitialization::kZero, + filterHeight, filterWidth, strideRows, strideCols, 0, 0, dropoutProbability, + EActivationFunction::kIdentity, ERegularization::kNone, 0), + indexMatrix() { for (size_t i = 0; i < this->GetBatchSize(); i++) { indexMatrix.emplace_back(this->GetDepth(), this->GetNLocalViews()); @@ -161,10 +124,7 @@ TMaxPoolLayer::TMaxPoolLayer(size_t batchSize, size_t inputDepth //______________________________________________________________________________ template TMaxPoolLayer::TMaxPoolLayer(TMaxPoolLayer *layer) - : VGeneralLayer(layer), indexMatrix(), fFrameHeight(layer->GetFrameHeight()), - fFrameWidth(layer->GetFrameWidth()), fStrideRows(layer->GetStrideRows()), fStrideCols(layer->GetStrideCols()), - fNLocalViewPixels(layer->GetNLocalViewPixels()), fNLocalViews(layer->GetNLocalViews()), - fDropoutProbability(layer->GetDropoutProbability()) + : TConvLayer(layer), indexMatrix() { for (size_t i = 0; i < layer->GetBatchSize(); i++) { indexMatrix.emplace_back(layer->GetDepth(), layer->GetNLocalViews()); @@ -174,10 +134,7 @@ TMaxPoolLayer::TMaxPoolLayer(TMaxPoolLayer *laye //______________________________________________________________________________ template TMaxPoolLayer::TMaxPoolLayer(const TMaxPoolLayer &layer) - : VGeneralLayer(layer), indexMatrix(), fFrameHeight(layer.fFrameHeight), - fFrameWidth(layer.fFrameWidth), fStrideRows(layer.fStrideRows), fStrideCols(layer.fStrideCols), - fNLocalViewPixels(layer.fNLocalViewPixels), fNLocalViews(layer.fNLocalViews), - fDropoutProbability(layer.fDropoutProbability) + : TConvLayer(layer), indexMatrix() { for (size_t i = 0; i < layer.fBatchSize; i++) { indexMatrix.emplace_back(layer.fDepth, layer.fNLocalViews); @@ -201,7 +158,7 @@ auto TMaxPoolLayer::Forward(std::vector &input, bool a } Architecture_t::Downsample(this->GetOutputAt(i), indexMatrix[i], input[i], this->GetInputHeight(), - this->GetInputWidth(), this->GetFrameHeight(), this->GetFrameWidth(), + this->GetInputWidth(), this->GetFilterHeight(), this->GetFilterWidth(), this->GetStrideRows(), this->GetStrideCols()); } } @@ -226,8 +183,8 @@ auto TMaxPoolLayer::Print() const -> void std::cout << " H = " << this->GetHeight() << " , "; std::cout << " D = " << this->GetDepth() << " ) "; - std::cout << "\t Frame ( W = " << this->GetFrameWidth() << " , "; - std::cout << " H = " << this->GetFrameHeight() << " ) "; + std::cout << "\t Filter ( W = " << this->GetFilterWidth() << " , "; + std::cout << " H = " << this->GetFilterHeight() << " ) "; if (this->GetOutput().size() > 0) { std::cout << "\tOutput = ( " << this->GetOutput().size() << " , " << this->GetOutput()[0].GetNrows() << " , " << this->GetOutput()[0].GetNcols() << " ) "; @@ -242,8 +199,8 @@ void TMaxPoolLayer::AddWeightsXMLTo(void *parent) auto layerxml = gTools().xmlengine().NewChild(parent, 0, "MaxPoolLayer"); // write maxpool layer info - gTools().xmlengine().NewAttr(layerxml, 0, "FrameHeight", gTools().StringFromInt(this->GetFrameHeight())); - gTools().xmlengine().NewAttr(layerxml, 0, "FrameWidth", gTools().StringFromInt(this->GetFrameWidth())); + gTools().xmlengine().NewAttr(layerxml, 0, "FilterHeight", gTools().StringFromInt(this->GetFilterHeight())); + gTools().xmlengine().NewAttr(layerxml, 0, "FilterWidth", gTools().StringFromInt(this->GetFilterWidth())); gTools().xmlengine().NewAttr(layerxml, 0, "StrideRows", gTools().StringFromInt(this->GetStrideRows())); gTools().xmlengine().NewAttr(layerxml, 0, "StrideCols", gTools().StringFromInt(this->GetStrideCols())); @@ -256,28 +213,6 @@ void TMaxPoolLayer::ReadWeightsFromXML(void * /*parent */) // all info is read before - nothing to do } -template -size_t TMaxPoolLayer::calculateDimension(int imgDim, int fltDim, int stride) -{ - Scalar_t dimension = ((imgDim - fltDim) / stride) + 1; - if (!isInteger(dimension) || dimension <= 0) { - Fatal("calculateDimension", "Not compatible hyper parameters for layer - (imageDim, filterDim, stride) %d , %d , %d", - imgDim, fltDim, stride); - } - - return (size_t)dimension; -} - -template -size_t TMaxPoolLayer::calculateNLocalViews(int inputHeight, int filterHeight, int strideRows, - int inputWidth, int filterWidth, int strideCols) -{ - int height = calculateDimension(inputHeight, filterHeight, strideRows); - int width = calculateDimension(inputWidth, filterWidth, strideCols); - - return height * width; -} - } // namespace CNN } // namespace DNN } // namespace TMVA diff --git a/tmva/tmva/src/MethodDL.cxx b/tmva/tmva/src/MethodDL.cxx index 66f1e95a5613b..aaaac417b7adf 100644 --- a/tmva/tmva/src/MethodDL.cxx +++ b/tmva/tmva/src/MethodDL.cxx @@ -660,8 +660,8 @@ void MethodDL::ParseMaxPoolLayer(DNN::TDeepNet &deepNet TString delim) { - int frameHeight = 0; - int frameWidth = 0; + int filterHeight = 0; + int filterWidth = 0; int strideRows = 0; int strideCols = 0; @@ -673,15 +673,15 @@ void MethodDL::ParseMaxPoolLayer(DNN::TDeepNet &deepNet for (; token != nullptr; token = (TObjString *)nextToken()) { switch (idxToken) { - case 1: // frame height + case 1: // filter height { TString strFrmHeight(token->GetString()); - frameHeight = strFrmHeight.Atoi(); + filterHeight = strFrmHeight.Atoi(); } break; - case 2: // frame width + case 2: // filter width { TString strFrmWidth(token->GetString()); - frameWidth = strFrmWidth.Atoi(); + filterWidth = strFrmWidth.Atoi(); } break; case 3: // stride in rows { @@ -699,10 +699,10 @@ void MethodDL::ParseMaxPoolLayer(DNN::TDeepNet &deepNet // Add the Max pooling layer // TMaxPoolLayer *maxPoolLayer = - deepNet.AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); + deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols); // Add the same layer to fNet - fNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); + fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols); //TMaxPoolLayer *copyMaxPoolLayer = new TMaxPoolLayer(*maxPoolLayer); @@ -1469,14 +1469,14 @@ void MethodDL::ReadWeightsFromXML(void * rootXML) else if (layerName == "MaxPoolLayer") { // read maxpool layer info - size_t frameHeight, frameWidth = 0; + size_t filterHeight, filterWidth = 0; size_t strideRows, strideCols = 0; - gTools().ReadAttr(layerXML, "FrameHeight", frameHeight); - gTools().ReadAttr(layerXML, "FrameWidth", frameWidth); + gTools().ReadAttr(layerXML, "FilterHeight", filterHeight); + gTools().ReadAttr(layerXML, "FilterWidth", filterWidth); gTools().ReadAttr(layerXML, "StrideRows", strideRows); gTools().ReadAttr(layerXML, "StrideCols", strideCols); - fNet->AddMaxPoolLayer(frameHeight, frameWidth, strideRows, strideCols); + fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols); } else if (layerName == "ReshapeLayer") { From 3cc2f8f8c204c73c9d07e705e03da25490d9c59d Mon Sep 17 00:00:00 2001 From: Emmanouil Stergiadis Date: Fri, 22 Jun 2018 10:53:35 +0200 Subject: [PATCH 4/4] Applied code review corrections --- tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h b/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h index ff31d784acc31..4eb6ef7048eb3 100644 --- a/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h +++ b/tmva/tmva/inc/TMVA/DNN/CNN/ConvLayer.h @@ -49,14 +49,14 @@ class TConvLayer : public VGeneralLayer { bool inline isInteger(Scalar_t x) const { return x == floor(x); } /* Calculate the output dimension of the convolutional layer */ - size_t calculateDimension(int imgDim, int fltDim, int padding, int stride); + size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride); /* Calculate the number of pixels in a single receptive field */ - size_t inline calculateNLocalViewPixels(int depth, int height, int width) { return depth * height * width; } + size_t inline calculateNLocalViewPixels(size_t depth, size_t height, size_t width) { return depth * height * width; } /* Calculate the number of receptive fields in an image given the filter and image sizes */ - size_t calculateNLocalViews(int inputHeight, int filterHeight, int paddingHeight, int strideRows, int inputWidth, - int filterWidth, int paddingWidth, int strideCols); + size_t calculateNLocalViews(size_t inputHeight, size_t filterHeight, size_t paddingHeight, size_t strideRows, + size_t inputWidth, size_t filterWidth, size_t paddingWidth, size_t strideCols); protected: size_t fFilterDepth; ///< The depth of the filter. @@ -171,12 +171,17 @@ TConvLayer::TConvLayer(size_t batchSize, size_t inputDepth, size inputWidth, filterWidth, paddingWidth, strideCols), init), fFilterDepth(inputDepth), fFilterHeight(filterHeight), fFilterWidth(filterWidth), fStrideRows(strideRows), - fStrideCols(strideCols), fPaddingHeight(paddingHeight), fPaddingWidth(paddingWidth), - fNLocalViewPixels(calculateNLocalViewPixels(inputDepth, filterHeight, filterWidth)), + fStrideCols(strideCols), fNLocalViewPixels(calculateNLocalViewPixels(inputDepth, filterHeight, filterWidth)), fNLocalViews(calculateNLocalViews(inputHeight, filterHeight, paddingHeight, strideRows, inputWidth, filterWidth, paddingWidth, strideCols)), - fDropoutProbability(dropoutProbability), fDerivatives(), fF(f), fReg(reg), fWeightDecay(weightDecay) + fDropoutProbability(dropoutProbability), fPaddingHeight(paddingHeight), fPaddingWidth(paddingWidth), + fDerivatives(), fF(f), fReg(reg), fWeightDecay(weightDecay) { + /** Each element in the vector is a `T_Matrix` representing an event, therefore `vec.size() == batchSize`. + * Cells in these matrices are distributed in the following manner: + * Each row represents a single feature map, therefore we have `nRows == depth`. + * Each column represents a single pixel in that feature map, therefore we have `nCols == nLocalViews`. + **/ for (size_t i = 0; i < batchSize; i++) { fDerivatives.emplace_back(depth, fNLocalViews); }