diff --git a/Makefile b/Makefile index e42c75ee1e8..ca5fff2c4c7 100644 --- a/Makefile +++ b/Makefile @@ -86,27 +86,37 @@ CUDA_LIB_DIR := $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib MKL_INCLUDE_DIR := $(MKL_DIR)/include MKL_LIB_DIR := $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64 -INCLUDE_DIRS += ./src ./include $(CUDA_INCLUDE_DIR) $(MKL_INCLUDE_DIR) -LIBRARY_DIRS += $(CUDA_LIB_DIR) $(MKL_LIB_DIR) +INCLUDE_DIRS += ./src ./include $(CUDA_INCLUDE_DIR) +LIBRARY_DIRS += $(CUDA_LIB_DIR) LIBRARIES := cudart cublas curand \ - mkl_rt \ pthread \ - glog protobuf leveldb \ - snappy \ + glog protobuf leveldb snappy \ boost_system \ hdf5_hl hdf5 \ opencv_core opencv_highgui opencv_imgproc PYTHON_LIBRARIES := boost_python python2.7 WARNINGS := -Wall -COMMON_FLAGS := -DNDEBUG -O2 $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) +COMMON_FLAGS := -DNDEBUG -O2 + +# MKL switch (default = non-MKL) +USE_MKL ?= 0 +ifeq ($(USE_MKL), 1) + LIBRARIES += mkl_rt + COMMON_FLAGS += -DUSE_MKL + INCLUDE_DIRS += $(MKL_INCLUDE_DIR) + LIBRARY_DIRS += $(MKL_LIB_DIR) +else + LIBRARIES += cblas atlas +endif + +COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) NVCCFLAGS := -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS) LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) \ $(foreach library,$(LIBRARIES),-l$(library)) PYTHON_LDFLAGS := $(LDFLAGS) $(foreach library,$(PYTHON_LIBRARIES),-l$(library)) - ############################## # Define build targets ############################## @@ -210,6 +220,10 @@ $(BUILD_DIR)/src/gtest/%.o: src/gtest/%.cpp $(CXX) $< $(CXXFLAGS) -c -o $@ @echo +$(BUILD_DIR)/src/$(PROJECT)/%.cuo: src/$(PROJECT)/%.cu + $(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@ + @echo + $(BUILD_DIR)/src/$(PROJECT)/layers/%.cuo: src/$(PROJECT)/layers/%.cu $(CUDA_DIR)/bin/nvcc $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@ @echo diff --git a/Makefile.config.example b/Makefile.config.example index cec85e0a7f7..95656dd0ad1 100644 --- a/Makefile.config.example +++ b/Makefile.config.example @@ -10,6 +10,8 @@ CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \ -gencode arch=compute_30,code=sm_30 \ -gencode arch=compute_35,code=sm_35 +# MKL switch: set to 1 for MKL +USE_MKL := 0 # MKL directory contains include/ and lib/ directions that we need. MKL_DIR := /opt/intel/mkl diff --git a/docs/feature_extraction.md b/docs/feature_extraction.md new file mode 100644 index 00000000000..7671fffa5a8 --- /dev/null +++ b/docs/feature_extraction.md @@ -0,0 +1,67 @@ +--- +layout: default +title: Caffe +--- + +Extracting Features +=================== + +In this tutorial, we will extract features using a pre-trained model. +Follow instructions for [setting up caffe](installation.html) and for [getting](getting_pretrained_models.html) the pre-trained ImageNet model. +If you need detailed information about the tools below, please consult their source code, in which additional documentation is usually provided. + +Select data to run on +--------------------- + +We'll make a temporary folder to store things into. + + mkdir examples/_temp + +Generate a list of the files to process. +We're going to use the images that ship with caffe. + + find `pwd`/examples/images -type f -exec echo {} \; > examples/_temp/file_list.txt + +The `ImagesLayer` we'll use expects labels after each filenames, so let's add a 0 to the end of each line + + sed "s/$/ 0/" examples/_temp/file_list.txt > examples/_temp/file_list.txt + +Define the Feature Extraction Network Architecture +-------------------------------------------------- + +In practice, subtracting the mean image from a dataset significantly improves classification accuracies. +Download the mean image of the ILSVRC dataset. + + data/ilsvrc12/get_ilsvrc_aux.sh + +We will use `data/ilsvrc212/imagenet_mean.binaryproto` in the network definition prototxt. + +Let's copy and modify the network definition. +We'll be using the `ImagesLayer`, which will load and resize images for us. + + cp examples/feature_extraction/imagenet_val.prototxt examples/_temp + +Edit `examples/_temp/imagenet_val.prototxt` to use correct path for your setup (replace `$CAFFE_DIR`) + +Extract Features +---------------- + +Now everything necessary is in place. + + build/tools/extract_features.bin models/caffe_reference_imagenet_model examples/_temp/imagenet_val.prototxt fc7 examples/_temp/features 10 + +The name of feature blob that you extract is `fc7`, which represents the highest level feature of the reference model. +We can use any other layer, as well, such as `conv5` or `pool3`. + +The last parameter above is the number of data mini-batches. + +The features are stored to LevelDB `examples/_temp/features`, ready for access by some other code. + +If you'd like to use the Python wrapper for extracting features, check out the [layer visualization notebook](http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/filter_visualization.ipynb). + +Clean Up +-------- + +Let's remove the temporary directory now. + + rm -r examples/_temp diff --git a/docs/index.md b/docs/index.md index 3db9dbaf20e..98c266c668a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -33,6 +33,7 @@ Even in CPU mode, computing predictions on an image takes only 20 ms when images * [LeNet / MNIST Demo](/mnist.html): end-to-end training and testing of LeNet on MNIST. * [CIFAR-10 Demo](/cifar10.html): training and testing on the CIFAR-10 data. * [Training ImageNet](/imagenet_training.html): end-to-end training of an ImageNet classifier. +* [Feature extraction with C++](/feature_extraction.html): feature extraction using pre-trained model * [Running Pretrained ImageNet \[notebook\]][pretrained_imagenet]: run classification with the pretrained ImageNet model using the Python interface. * [Running Detection \[notebook\]][imagenet_detection]: run a pretrained model as a detector. * [Visualizing Features and Filters \[notebook\]][visualizing_filters]: trained filters and an example image, viewed layer-by-layer. diff --git a/examples/feature_extraction/imagenet_val.prototxt b/examples/feature_extraction/imagenet_val.prototxt new file mode 100644 index 00000000000..c7b26509125 --- /dev/null +++ b/examples/feature_extraction/imagenet_val.prototxt @@ -0,0 +1,247 @@ +name: "CaffeNet" +layers { + layer { + name: "data" + type: "images" + source: "$CAFFE_DIR/examples/_temp/file_list.txt" + meanfile: "$CAFFE_DIR/data/ilsvrc12/imagenet_mean.binaryproto" + batchsize: 50 + new_height: 256 + new_width: 256 + mirror: false + cropsize: 227 + } + top: "data" + top: "label" +} +layers { + layer { + name: "conv1" + type: "conv" + num_output: 96 + kernelsize: 11 + stride: 4 + } + bottom: "data" + top: "conv1" +} +layers { + layer { + name: "relu1" + type: "relu" + } + bottom: "conv1" + top: "conv1" +} +layers { + layer { + name: "pool1" + type: "pool" + pool: MAX + kernelsize: 3 + stride: 2 + } + bottom: "conv1" + top: "pool1" +} +layers { + layer { + name: "norm1" + type: "lrn" + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } + bottom: "pool1" + top: "norm1" +} +layers { + layer { + name: "conv2" + type: "conv" + num_output: 256 + group: 2 + kernelsize: 5 + pad: 2 + } + bottom: "norm1" + top: "conv2" +} +layers { + layer { + name: "relu2" + type: "relu" + } + bottom: "conv2" + top: "conv2" +} +layers { + layer { + name: "pool2" + type: "pool" + pool: MAX + kernelsize: 3 + stride: 2 + } + bottom: "conv2" + top: "pool2" +} +layers { + layer { + name: "norm2" + type: "lrn" + local_size: 5 + alpha: 0.0001 + beta: 0.75 + } + bottom: "pool2" + top: "norm2" +} +layers { + layer { + name: "conv3" + type: "conv" + num_output: 384 + kernelsize: 3 + pad: 1 + } + bottom: "norm2" + top: "conv3" +} +layers { + layer { + name: "relu3" + type: "relu" + } + bottom: "conv3" + top: "conv3" +} +layers { + layer { + name: "conv4" + type: "conv" + num_output: 384 + group: 2 + kernelsize: 3 + pad: 1 + } + bottom: "conv3" + top: "conv4" +} +layers { + layer { + name: "relu4" + type: "relu" + } + bottom: "conv4" + top: "conv4" +} +layers { + layer { + name: "conv5" + type: "conv" + num_output: 256 + group: 2 + kernelsize: 3 + pad: 1 + } + bottom: "conv4" + top: "conv5" +} +layers { + layer { + name: "relu5" + type: "relu" + } + bottom: "conv5" + top: "conv5" +} +layers { + layer { + name: "pool5" + type: "pool" + kernelsize: 3 + pool: MAX + stride: 2 + } + bottom: "conv5" + top: "pool5" +} +layers { + layer { + name: "fc6" + type: "innerproduct" + num_output: 4096 + } + bottom: "pool5" + top: "fc6" +} +layers { + layer { + name: "relu6" + type: "relu" + } + bottom: "fc6" + top: "fc6" +} +layers { + layer { + name: "drop6" + type: "dropout" + dropout_ratio: 0.5 + } + bottom: "fc6" + top: "fc6" +} +layers { + layer { + name: "fc7" + type: "innerproduct" + num_output: 4096 + } + bottom: "fc6" + top: "fc7" +} +layers { + layer { + name: "relu7" + type: "relu" + } + bottom: "fc7" + top: "fc7" +} +layers { + layer { + name: "drop7" + type: "dropout" + dropout_ratio: 0.5 + } + bottom: "fc7" + top: "fc7" +} +layers { + layer { + name: "fc8" + type: "innerproduct" + num_output: 1000 + } + bottom: "fc7" + top: "fc8" +} +layers { + layer { + name: "prob" + type: "softmax" + } + bottom: "fc8" + top: "prob" +} +layers { + layer { + name: "accuracy" + type: "accuracy" + } + bottom: "prob" + bottom: "label" + top: "accuracy" +} diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp index f31d3b0f693..75cc3c67288 100644 --- a/include/caffe/blob.hpp +++ b/include/caffe/blob.hpp @@ -27,6 +27,14 @@ class Blob { inline int count() const {return count_; } inline int offset(const int n, const int c = 0, const int h = 0, const int w = 0) const { + CHECK_GE(n, 0); + CHECK_LE(n, num_); + CHECK_GE(channels_, 0); + CHECK_LE(c, channels_); + CHECK_GE(height_, 0); + CHECK_LE(h, height_); + CHECK_GE(width_, 0); + CHECK_LE(w, width_); return ((n * channels_ + c) * height_ + h) * width_ + w; } // Copy from source. If copy_diff is false, we copy the data; if copy_diff diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp index 96ba58c2716..5344139c551 100644 --- a/include/caffe/common.hpp +++ b/include/caffe/common.hpp @@ -1,4 +1,4 @@ -// Copyright 2013 Yangqing Jia +// Copyright 2014 BVLC and contributors. #ifndef CAFFE_COMMON_HPP_ #define CAFFE_COMMON_HPP_ @@ -7,28 +7,8 @@ #include #include #include -// cuda driver types -#include +#include // cuda driver types #include -#include - -// various checks for different function calls. -#define CUDA_CHECK(condition) CHECK_EQ((condition), cudaSuccess) -#define CUBLAS_CHECK(condition) CHECK_EQ((condition), CUBLAS_STATUS_SUCCESS) -#define CURAND_CHECK(condition) CHECK_EQ((condition), CURAND_STATUS_SUCCESS) -#define VSL_CHECK(condition) CHECK_EQ((condition), VSL_STATUS_OK) - -#define CUDA_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ - i < (n); \ - i += blockDim.x * gridDim.x) - -// After a kernel is executed, this will check the error and if there is one, -// exit loudly. -#define CUDA_POST_KERNEL_CHECK \ - if (cudaSuccess != cudaPeekAtLastError()) \ - LOG(FATAL) << "Cuda kernel failed. Error: " \ - << cudaGetErrorString(cudaPeekAtLastError()) // Disable the copy and assignment operator for a class. #define DISABLE_COPY_AND_ASSIGN(classname) \ @@ -45,6 +25,23 @@ private:\ // is executed we will see a fatal log. #define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet" +// CUDA: various checks for different function calls. +#define CUDA_CHECK(condition) CHECK_EQ((condition), cudaSuccess) +#define CUBLAS_CHECK(condition) CHECK_EQ((condition), CUBLAS_STATUS_SUCCESS) +#define CURAND_CHECK(condition) CHECK_EQ((condition), CURAND_STATUS_SUCCESS) + +// CUDA: grid stride looping +#define CUDA_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ + i < (n); \ + i += blockDim.x * gridDim.x) + +// CUDA: check for error after kernel execution and exit loudly if there is one. +#define CUDA_POST_KERNEL_CHECK \ + if (cudaSuccess != cudaPeekAtLastError()) \ + LOG(FATAL) << "Cuda kernel failed. Error: " \ + << cudaGetErrorString(cudaPeekAtLastError()) + namespace caffe { @@ -53,20 +50,6 @@ namespace caffe { using boost::shared_ptr; -// We will use 1024 threads per block, which requires cuda sm_2x or above. -#if __CUDA_ARCH__ >= 200 - const int CAFFE_CUDA_NUM_THREADS = 1024; -#else - const int CAFFE_CUDA_NUM_THREADS = 512; -#endif - - - -inline int CAFFE_GET_BLOCKS(const int N) { - return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS; -} - - // A singleton class to hold common caffe stuff, such as the handler that // caffe is going to use for cublas, curand, etc. class Caffe { @@ -81,15 +64,32 @@ class Caffe { enum Brew { CPU, GPU }; enum Phase { TRAIN, TEST }; - // The getters for the variables. - // Returns the cublas handle. + + // This random number generator facade hides boost and CUDA rng + // implementation from one another (for cross-platform compatibility). + class RNG { + public: + RNG(); + explicit RNG(unsigned int seed); + ~RNG(); + RNG(const RNG&); + RNG& operator=(const RNG&); + const void* generator() const; + void* generator(); + private: + class Generator; + Generator* generator_; + }; + + // Getters for boost rng, curand, and cublas handles + inline static RNG &rng_stream() { + return Get().random_generator_; + } inline static cublasHandle_t cublas_handle() { return Get().cublas_handle_; } - // Returns the curand generator. inline static curandGenerator_t curand_generator() { return Get().curand_generator_; } - // Returns the MKL random stream. - inline static VSLStreamStatePtr vsl_stream() { return Get().vsl_stream_; } + // Returns the mode: running on CPU or GPU. inline static Brew mode() { return Get().mode_; } // Returns the phase: TRAIN or TEST. @@ -102,7 +102,7 @@ class Caffe { inline static void set_mode(Brew mode) { Get().mode_ = mode; } // Sets the phase. inline static void set_phase(Phase phase) { Get().phase_ = phase; } - // Sets the random seed of both MKL and curand + // Sets the random seed of both boost and curand static void set_random_seed(const unsigned int seed); // Sets the device. Since we have cublas and curand stuff, set device also // requires us to reset those values. @@ -113,7 +113,8 @@ class Caffe { protected: cublasHandle_t cublas_handle_; curandGenerator_t curand_generator_; - VSLStreamStatePtr vsl_stream_; + RNG random_generator_; + Brew mode_; Phase phase_; static shared_ptr singleton_; @@ -126,6 +127,21 @@ class Caffe { }; +// CUDA: thread number configuration. +// Use 1024 threads per block, which requires cuda sm_2x or above, +// or fall back to attempt compatibility (best of luck to you). +#if __CUDA_ARCH__ >= 200 + const int CAFFE_CUDA_NUM_THREADS = 1024; +#else + const int CAFFE_CUDA_NUM_THREADS = 512; +#endif + +// CUDA: number of blocks for threads. +inline int CAFFE_GET_BLOCKS(const int N) { + return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS; +} + + } // namespace caffe #endif // CAFFE_COMMON_HPP_ diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index 5b934a331e3..7c1002245d0 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -7,7 +7,6 @@ #ifndef CAFFE_FILLER_HPP #define CAFFE_FILLER_HPP -#include #include #include "caffe/common.hpp" diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp index a0cb487e50d..6aaab6fe1b3 100644 --- a/include/caffe/layer.hpp +++ b/include/caffe/layer.hpp @@ -7,6 +7,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" +#include "caffe/regularizer.hpp" using std::vector; @@ -28,6 +29,12 @@ class Layer { blobs_[i]->FromProto(layer_param_.blobs(i)); } } + if (layer_param_.regularizer_size() > 0) { + regularizers_.resize(layer_param_.regularizer_size()); + for (int i = 0; i < layer_param_.regularizer_size(); ++i) { + regularizers_[i].reset(GetRegularizer(param.regularizer(i))); + } + } } virtual ~Layer() {} // SetUp: your function should implement this. @@ -37,9 +44,9 @@ class Layer { // Forward and backward wrappers. You should implement the cpu and // gpu specific implementations instead, and should not change these // functions. - inline void Forward(const vector*>& bottom, + inline Dtype Forward(const vector*>& bottom, vector*>* top); - inline Dtype Backward(const vector*>& top, + inline void Backward(const vector*>& top, const bool propagate_down, vector*>* bottom); @@ -58,28 +65,30 @@ class Layer { LayerParameter layer_param_; // The vector that stores the parameters as a set of blobs. vector > > blobs_; + // The vector that stores the regularizers. + vector > > regularizers_; // Forward functions - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top) = 0; // If no gpu code is provided, we will simply use cpu code. - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top) { // LOG(WARNING) << "Using CPU code as backup."; - Forward_cpu(bottom, top); + return Forward_cpu(bottom, top); } // Backward functions: the backward function will compute the gradients for // any parameters and also for the bottom blobs if propagate_down is true. // It will return the loss produced from this layer. - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) = 0; - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { // LOG(WARNING) << "Using CPU code as backup."; - return Backward_cpu(top, propagate_down, bottom); + Backward_cpu(top, propagate_down, bottom); } DISABLE_COPY_AND_ASSIGN(Layer); @@ -89,29 +98,38 @@ class Layer { // gpu specific implementations instead, and should not change these // functions. template -inline void Layer::Forward(const vector*>& bottom, +inline Dtype Layer::Forward(const vector*>& bottom, vector*>* top) { + Dtype loss; switch (Caffe::mode()) { case Caffe::CPU: - Forward_cpu(bottom, top); + loss = Forward_cpu(bottom, top); break; case Caffe::GPU: - Forward_gpu(bottom, top); + loss = Forward_gpu(bottom, top); break; default: - LOG(FATAL) << "Unknown caffe mode."; + LOG(FATAL) << "Unknown caffe mode " << Caffe::mode(); } + if (layer_param_.regularizer_size() > 0) { + for (int i = 0; i < layer_param_.regularizer_size(); ++i) { + loss += regularizers_[i]->Regularize(bottom[0]); + } + } + return loss; } template -inline Dtype Layer::Backward(const vector*>& top, +inline void Layer::Backward(const vector*>& top, const bool propagate_down, vector*>* bottom) { switch (Caffe::mode()) { case Caffe::CPU: - return Backward_cpu(top, propagate_down, bottom); + Backward_cpu(top, propagate_down, bottom); + break; case Caffe::GPU: - return Backward_gpu(top, propagate_down, bottom); + Backward_gpu(top, propagate_down, bottom); + break; default: LOG(FATAL) << "Unknown caffe mode."; } diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index b5a57b3c5a4..81fe25d2854 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -31,21 +31,24 @@ class Net { // Run forward with the input blobs already fed separately. You can get the // input blobs using input_blobs(). - const vector*>& ForwardPrefilled(); + const vector*>& ForwardPrefilled(Dtype* loss = NULL); // Run forward using a set of bottom blobs, and return the result. - const vector*>& Forward(const vector* > & bottom); + const vector*>& Forward(const vector* > & bottom, + Dtype* loss = NULL); // Run forward using a serialized BlobProtoVector and return the result // as a serialized BlobProtoVector - string Forward(const string& input_blob_protos); + string Forward(const string& input_blob_protos, Dtype* loss = NULL); // The network backward should take no input and output, since it solely // computes the gradient w.r.t the parameters, and the data has already // been provided during the forward pass. - Dtype Backward(); + void Backward(); Dtype ForwardBackward(const vector* > & bottom) { - Forward(bottom); - return Backward(); + Dtype loss; + Forward(bottom, &loss); + Backward(); + return loss; } // Updates the network weights based on the diff values computed. @@ -82,6 +85,13 @@ class Net { inline int num_outputs() { return net_output_blobs_.size(); } inline vector*>& input_blobs() { return net_input_blobs_; } inline vector*>& output_blobs() { return net_output_blobs_; } + // has_blob and blob_by_name are inspired by + // https://github.com/kencoken/caffe/commit/f36e71569455c9fbb4bf8a63c2d53224e32a4e7b + // Access intermediary computation layers, testing with centre image only + bool has_blob(const string& blob_name); + const shared_ptr > blob_by_name(const string& blob_name); + bool has_layer(const string& layer_name); + const shared_ptr > layer_by_name(const string& layer_name); protected: // Function to get misc parameters, e.g. the learning rate multiplier and @@ -91,11 +101,13 @@ class Net { // Individual layers in the net vector > > layers_; vector layer_names_; + map layer_names_index_; vector layer_need_backward_; // blobs stores the blobs that store intermediate results between the // layers. vector > > blobs_; vector blob_names_; + map blob_names_index_; vector blob_need_backward_; // bottom_vecs stores the vectors containing the input for each layer. // They don't actually host the blobs (blobs_ does), so we simply store diff --git a/include/caffe/regularizer.hpp b/include/caffe/regularizer.hpp new file mode 100644 index 00000000000..ac524aec28e --- /dev/null +++ b/include/caffe/regularizer.hpp @@ -0,0 +1,75 @@ +// Copyright 2014 kloudkl@github + +#ifndef CAFFE_REGULARIZER_HPP_ +#define CAFFE_REGULARIZER_HPP_ + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +template +class Regularizer { + public: + explicit Regularizer(const RegularizerParameter& param) + : coeff_(Dtype(param.coeff())) { + if (coeff_ < 0) { + LOG(FATAL)<< + "Regularizer coefficient must be greater than or equal to zero"; + } + } + + virtual ~Regularizer() { + } + + virtual Dtype Regularize(Blob* bottom); + virtual Dtype Regularize_cpu(Blob* bottom) = 0; + virtual Dtype Regularize_gpu(Blob* bottom) = 0; + + inline Dtype coeff() { + return coeff_; + } + inline void set_coeff(const Dtype coeff) { + coeff_ = coeff; + } + + protected: + // the weight regularization coefficient + Dtype coeff_; + DISABLE_COPY_AND_ASSIGN(Regularizer); +}; + +#define MAKE_SIMPLE_REGULARIZER_CLASS(type) \ +template \ +class type##Regularizer : public Regularizer { \ + /* NOLINT_NEXT_LINE(whitespace/indent) */ \ + public: \ + type##Regularizer(const RegularizerParameter& param) \ + : Regularizer(param) { \ + } \ + \ + virtual ~type##Regularizer() { \ + } \ + \ + virtual Dtype Regularize_cpu(Blob* bottom); \ + virtual Dtype Regularize_gpu(Blob* bottom); \ + \ + /* NOLINT_NEXT_LINE(whitespace/indent) */ \ + protected: \ + DISABLE_COPY_AND_ASSIGN(type##Regularizer); \ +} + +MAKE_SIMPLE_REGULARIZER_CLASS(L1); +MAKE_SIMPLE_REGULARIZER_CLASS(L2); +MAKE_SIMPLE_REGULARIZER_CLASS(MaxNorm); + +#define REG_TYPE(type) REG_TYPE_PASTE(type) +#define REG_TYPE_PASTE(type) RegularizerParameter_RegularizerType_##type + +template +Regularizer* GetRegularizer(const RegularizerParameter& param); + +} // namespace caffe + +#endif // CAFFE_REGULARIZER_HPP_ diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp index 7bf78977d6d..e5405727ee4 100644 --- a/include/caffe/util/io.hpp +++ b/include/caffe/util/io.hpp @@ -15,6 +15,8 @@ using std::string; using ::google::protobuf::Message; +#define HDF5_NUM_DIMS 4 + namespace caffe { void ReadProtoFromTextFile(const char* filename, @@ -60,6 +62,10 @@ void hdf5_load_nd_dataset( hid_t file_id, const char* dataset_name_, int min_dim, int max_dim, Blob* blob); +template +void hdf5_save_nd_dataset( + const hid_t file_id, const string dataset_name, const Blob& blob); + } // namespace caffe #endif // CAFFE_UTIL_IO_H_ diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index e9e2db8f274..81097ef9774 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -1,10 +1,14 @@ // Copyright 2013 Yangqing Jia +// Copyright 2014 kloudkl@github #ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_ #define CAFFE_UTIL_MATH_FUNCTIONS_H_ -#include #include +#include // for signbit +#include // for std::fabs + +#include "caffe/util/mkl_alternate.hpp" namespace caffe { @@ -44,7 +48,7 @@ void caffe_gpu_axpy(const int N, const Dtype alpha, const Dtype* X, Dtype* Y); template -void caffe_axpby(const int N, const Dtype alpha, const Dtype* X, +void caffe_cpu_axpby(const int N, const Dtype alpha, const Dtype* X, const Dtype beta, Dtype* Y); template @@ -84,6 +88,9 @@ void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y); template void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y); +template +Dtype caffe_nextafter(const Dtype b); + template void caffe_vRngUniform(const int n, Dtype* r, const Dtype a, const Dtype b); @@ -91,6 +98,9 @@ template void caffe_vRngGaussian(const int n, Dtype* r, const Dtype a, const Dtype sigma); +template +void caffe_vRngBernoulli(const int n, Dtype* r, const double p); + template void caffe_exp(const int n, const Dtype* a, Dtype* y); @@ -100,6 +110,91 @@ Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y); template void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out); +template +int caffe_hamming_distance(const int n, const Dtype* x, const Dtype* y); + +// Returns the sum of the absolute values of the elements of vector x +template +Dtype caffe_cpu_asum(const int n, const Dtype* x); + +template +void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y); + +// the branchless, type-safe version from +// http://stackoverflow.com/questions/1903954/is-there-a-standard-sign-function-signum-sgn-in-c-c +template +inline char caffe_sign(Dtype val) { + return (Dtype(0) < val) - (val < Dtype(0)); +} + +// The following two macros are modifications of DEFINE_VSL_UNARY_FUNC +// in include/caffe/util/mkl_alternate.hpp authored by @Rowland Depp. +// Please refer to commit 7e8ef25c7 of the boost-eigen branch. +// Git cherry picking that commit caused a conflict hard to resolve and +// copying that file in convenient for code reviewing. +// So they have to be pasted here temporarily. +#define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \ + template \ + void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \ + CHECK_GT(n, 0); CHECK(x); CHECK(y); \ + for (int i = 0; i < n; ++i) { \ + operation; \ + } \ + } + +#define INSTANTIATE_CAFFE_CPU_UNARY_FUNC(name) \ + template <> \ + void caffe_cpu_##name(const int n, const float* x, float* y); \ + template <> \ + void caffe_cpu_##name(const int n, const double* x, double* y) + + +#define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \ +template \ +__global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \ + int index = threadIdx.x + blockIdx.x * blockDim.x; \ + if (index < n) { \ + operation; \ + } \ +} \ +template <> \ +void caffe_gpu_##name(const int n, const float* x, float* y) { \ + /* NOLINT_NEXT_LINE(whitespace/operators) */ \ + name##_kernel<<>>( \ + n, x, y); \ +} \ +template <> \ +void caffe_gpu_##name(const int n, const double* x, double* y) { \ + /* NOLINT_NEXT_LINE(whitespace/operators) */ \ + name##_kernel<<>>( \ + n, x, y); \ +} + +// output is 1 for the positives, 0 for zero, and -1 for the negatives +DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign(x[i])); + +template +void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y); + +// This returns a nonzero value if the input has its sign bit set. +// The name sngbit is meant to avoid conflicts with std::signbit in the macro +using std::signbit; +DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, y[i] = signbit(x[i])); + +template +void caffe_gpu_sgnbit(const int n, const Dtype* x, Dtype* y); + +DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i])); + +template +void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y); + +template +void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); + +template +void caffe_gpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); + } // namespace caffe diff --git a/include/caffe/util/mkl_alternate.hpp b/include/caffe/util/mkl_alternate.hpp new file mode 100644 index 00000000000..39038dd148e --- /dev/null +++ b/include/caffe/util/mkl_alternate.hpp @@ -0,0 +1,97 @@ +// Copyright 2013 Rowland Depp + +#ifndef CAFFE_UTIL_MKL_ALTERNATE_H_ +#define CAFFE_UTIL_MKL_ALTERNATE_H_ + +#ifdef USE_MKL + +#include + +#else // If use MKL, simply include the MKL header + +extern "C" { +#include +} +#include + +// Functions that caffe uses but are not present if MKL is not linked. + +// A simple way to define the vsl unary functions. The operation should +// be in the form e.g. y[i] = sqrt(a[i]) +#define DEFINE_VSL_UNARY_FUNC(name, operation) \ + template \ + void v##name(const int n, const Dtype* a, Dtype* y) { \ + CHECK_GT(n, 0); CHECK(a); CHECK(y); \ + for (int i = 0; i < n; ++i) { operation; } \ + } \ + inline void vs##name( \ + const int n, const float* a, float* y) { \ + v##name(n, a, y); \ + } \ + inline void vd##name( \ + const int n, const double* a, double* y) { \ + v##name(n, a, y); \ + } + +DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i]); +DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i])); + +// A simple way to define the vsl unary functions with singular parameter b. +// The operation should be in the form e.g. y[i] = pow(a[i], b) +#define DEFINE_VSL_UNARY_FUNC_WITH_PARAM(name, operation) \ + template \ + void v##name(const int n, const Dtype* a, const Dtype b, Dtype* y) { \ + CHECK_GT(n, 0); CHECK(a); CHECK(y); \ + for (int i = 0; i < n; ++i) { operation; } \ + } \ + inline void vs##name( \ + const int n, const float* a, const float b, float* y) { \ + v##name(n, a, b, y); \ + } \ + inline void vd##name( \ + const int n, const double* a, const float b, double* y) { \ + v##name(n, a, b, y); \ + } + +DEFINE_VSL_UNARY_FUNC_WITH_PARAM(Powx, y[i] = pow(a[i], b)); + +// A simple way to define the vsl binary functions. The operation should +// be in the form e.g. y[i] = a[i] + b[i] +#define DEFINE_VSL_BINARY_FUNC(name, operation) \ + template \ + void v##name(const int n, const Dtype* a, const Dtype* b, Dtype* y) { \ + CHECK_GT(n, 0); CHECK(a); CHECK(b); CHECK(y); \ + for (int i = 0; i < n; ++i) { operation; } \ + } \ + inline void vs##name( \ + const int n, const float* a, const float* b, float* y) { \ + v##name(n, a, b, y); \ + } \ + inline void vd##name( \ + const int n, const double* a, const double* b, double* y) { \ + v##name(n, a, b, y); \ + } + +DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]); +DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i]); +DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i]); +DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i]); + +// In addition, MKL comes with an additional function axpby that is not present +// in standard blas. We will simply use a two-step (inefficient, of course) way +// to mimic that. +inline void cblas_saxpby(const int N, const float alpha, const float* X, + const int incX, const float beta, float* Y, + const int incY) { + cblas_sscal(N, beta, Y, incY); + cblas_saxpy(N, alpha, X, incX, Y, incY); +} +inline void cblas_daxpby(const int N, const double alpha, const double* X, + const int incX, const double beta, double* Y, + const int incY) { + cblas_dscal(N, beta, Y, incY); + cblas_daxpy(N, alpha, X, incX, Y, incY); +} + +#endif // USE_MKL +#endif // CAFFE_UTIL_MKL_ALTERNATE_H_ diff --git a/include/caffe/util/rng.hpp b/include/caffe/util/rng.hpp new file mode 100644 index 00000000000..8151a9a6f67 --- /dev/null +++ b/include/caffe/util/rng.hpp @@ -0,0 +1,19 @@ +// Copyright 2014 BVLC and contributors. + +#ifndef CAFFE_RNG_CPP_HPP_ +#define CAFFE_RNG_CPP_HPP_ + +#include +#include "caffe/common.hpp" + +namespace caffe { + + typedef boost::mt19937 rng_t; + inline rng_t& caffe_rng() { + Caffe::RNG &generator = Caffe::rng_stream(); + return *(caffe::rng_t*) generator.generator(); + } + +} // namespace caffe + +#endif // CAFFE_RNG_HPP_ diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 90e2caa664f..0d5bf4e79a7 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -14,6 +14,10 @@ #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" +#include "caffe/regularizer.hpp" + +#define HDF5_DATA_DATASET_NAME "data" +#define HDF5_DATA_LABEL_NAME "label" namespace caffe { @@ -37,14 +41,14 @@ class ReLULayer : public NeuronLayer { : NeuronLayer(param) {} protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); }; @@ -55,14 +59,14 @@ class TanHLayer : public NeuronLayer { : NeuronLayer(param) {} protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); }; @@ -73,14 +77,14 @@ class SigmoidLayer : public NeuronLayer { : NeuronLayer(param) {} protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); }; @@ -92,14 +96,14 @@ class BNLLLayer : public NeuronLayer { : NeuronLayer(param) {} protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); }; @@ -113,14 +117,14 @@ class DropoutLayer : public NeuronLayer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); shared_ptr rand_vec_; float threshold_; @@ -138,13 +142,13 @@ class SplitLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); int count_; }; @@ -159,13 +163,13 @@ class FlattenLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); int count_; }; @@ -180,14 +184,14 @@ class InnerProductLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); int M_; int K_; @@ -206,13 +210,13 @@ class PaddingLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); unsigned int PAD_; int NUM_; @@ -233,13 +237,13 @@ class LRNLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); // scale_ stores the intermediate summing results Blob scale_; @@ -263,13 +267,13 @@ class Im2colLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); int KSIZE_; int STRIDE_; @@ -288,13 +292,13 @@ class PoolingLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); int KSIZE_; int STRIDE_; @@ -316,13 +320,13 @@ class ConvolutionLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); Blob col_bob_; @@ -352,13 +356,13 @@ class ConcatLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); Blob col_bob_; @@ -387,14 +391,14 @@ class DataLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, - const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom); + virtual void Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { return; } + virtual void Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { return; } shared_ptr db_; shared_ptr iter_; @@ -425,14 +429,14 @@ class ImagesLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, - const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom); + virtual void Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { return; } + virtual void Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { return; } vector > lines_; int lines_id_; @@ -457,13 +461,13 @@ class HDF5DataLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); virtual void load_hdf5_file_data(const char* filename); @@ -477,6 +481,33 @@ class HDF5DataLayer : public Layer { }; +template +class HDF5OutputLayer : public Layer { + public: + explicit HDF5OutputLayer(const LayerParameter& param); + virtual ~HDF5OutputLayer(); + virtual void SetUp(const vector*>& bottom, + vector*>* top); + inline std::string file_name() const { return file_name_; } + + protected: + virtual Dtype Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual Dtype Forward_gpu(const vector*>& bottom, + vector*>* top); + virtual void Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + virtual void Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + virtual void SaveBlobs(); + + std::string file_name_; + hid_t file_id_; + Blob data_blob_; + Blob label_blob_; +}; + + template class SoftmaxLayer : public Layer { public: @@ -486,13 +517,13 @@ class SoftmaxLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); // sum_multiplier is just used to carry out sum using blas @@ -513,13 +544,13 @@ class MultinomialLogisticLossLayer : public Layer { protected: // The loss layer will do nothing during forward - all computation are // carried out in the backward pass. - virtual void Forward_cpu(const vector*>& bottom, - vector*>* top) { return; } - virtual void Forward_gpu(const vector*>& bottom, - vector*>* top) { return; } - virtual Dtype Backward_cpu(const vector*>& top, + virtual Dtype Forward_cpu(const vector*>& bottom, + vector*>* top); + // virtual Dtype Forward_gpu(const vector*>& bottom, + // vector*>* top); + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - // virtual Dtype Backward_gpu(const vector*>& top, + // virtual void Backward_gpu(const vector*>& top, // const bool propagate_down, vector*>* bottom); }; @@ -534,13 +565,13 @@ class InfogainLossLayer : public Layer { protected: // The loss layer will do nothing during forward - all computation are // carried out in the backward pass. - virtual void Forward_cpu(const vector*>& bottom, - vector*>* top) { return; } - virtual void Forward_gpu(const vector*>& bottom, - vector*>* top) { return; } - virtual Dtype Backward_cpu(const vector*>& top, + virtual Dtype Forward_cpu(const vector*>& bottom, + vector*>* top); + // virtual Dtype Forward_gpu(const vector*>& bottom, + // vector*>* top); + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - // virtual Dtype Backward_gpu(const vector*>& top, + // virtual void Backward_gpu(const vector*>& top, // const bool propagate_down, vector*>* bottom); Blob infogain_; @@ -561,13 +592,13 @@ class SoftmaxWithLossLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, + virtual void Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom); shared_ptr > softmax_layer_; @@ -590,17 +621,46 @@ class EuclideanLossLayer : public Layer { protected: // The loss layer will do nothing during forward - all computation are // carried out in the backward pass. - virtual void Forward_cpu(const vector*>& bottom, - vector*>* top) { return; } - virtual void Forward_gpu(const vector*>& bottom, - vector*>* top) { return; } - virtual Dtype Backward_cpu(const vector*>& top, + virtual Dtype Forward_cpu(const vector*>& bottom, + vector*>* top); + // virtual Dtype Forward_gpu(const vector*>& bottom, + // vector*>* top); + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom); - // virtual Dtype Backward_gpu(const vector*>& top, + // virtual void Backward_gpu(const vector*>& top, // const bool propagate_down, vector*>* bottom); Blob difference_; }; +// The most natural places should the Regularizer subclasses +// be used are in the Layer::Backward* methods. +// The most beneficial use case is to succinctly test this layer +// following the practice in test_regularizer_as_loss_layer.cpp +// instead of testing the Regularizers for every other kind of layer +// which would be combination explosion. +// If you do want to use this layer as an independent layer in a network model, +// be cautious that it may incur unnecessary extra memory usage compared +// with the recommended method. +template +class RegularizerAsLossLayer : public Layer { + public: + explicit RegularizerAsLossLayer(const LayerParameter& param); + virtual void SetUp(const vector*>& bottom, + vector*>* top); + + protected: + virtual Dtype Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual Dtype Forward_gpu(const vector*>& bottom, + vector*>* top); + virtual void Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + virtual void Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + + vector > > regularizers_; + int num_regularizers_; +}; template class AccuracyLayer : public Layer { @@ -611,13 +671,12 @@ class AccuracyLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); // The accuracy layer should not be used to compute backward operations. - virtual Dtype Backward_cpu(const vector*>& top, + virtual void Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { NOT_IMPLEMENTED; - return Dtype(0.); } }; @@ -638,14 +697,14 @@ class WindowDataLayer : public Layer { vector*>* top); protected: - virtual void Forward_cpu(const vector*>& bottom, + virtual Dtype Forward_cpu(const vector*>& bottom, vector*>* top); - virtual void Forward_gpu(const vector*>& bottom, + virtual Dtype Forward_gpu(const vector*>& bottom, vector*>* top); - virtual Dtype Backward_cpu(const vector*>& top, - const bool propagate_down, vector*>* bottom); - virtual Dtype Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom); + virtual void Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { return; } + virtual void Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { return; } pthread_t thread_; shared_ptr > prefetch_data_; diff --git a/src/caffe/common.cpp b/src/caffe/common.cpp index f47173afcae..59cbc56b61c 100644 --- a/src/caffe/common.cpp +++ b/src/caffe/common.cpp @@ -1,15 +1,17 @@ -// Copyright 2013 Yangqing Jia +// Copyright 2014 BVLC and contributors. #include #include #include "caffe/common.hpp" +#include "caffe/util/rng.hpp" namespace caffe { shared_ptr Caffe::singleton_; +// curand seeding int64_t cluster_seedgen(void) { int64_t s, seed, pid; pid = getpid(); @@ -21,7 +23,8 @@ int64_t cluster_seedgen(void) { Caffe::Caffe() : mode_(Caffe::CPU), phase_(Caffe::TRAIN), cublas_handle_(NULL), - curand_generator_(NULL), vsl_stream_(NULL) { + curand_generator_(NULL), + random_generator_() { // Try to create a cublas handler, and report an error if failed (but we will // keep the program running as one might just want to run CPU code). if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) { @@ -34,13 +37,6 @@ Caffe::Caffe() != CURAND_STATUS_SUCCESS) { LOG(ERROR) << "Cannot create Curand generator. Curand won't be available."; } - // Try to create a vsl stream. This should almost always work, but we will - // check it anyway. - if (vslNewStream(&vsl_stream_, VSL_BRNG_MT19937, - cluster_seedgen()) != VSL_STATUS_OK) { - LOG(ERROR) << "Cannot create vsl stream. VSL random number generator " - << "won't be available."; - } } Caffe::~Caffe() { @@ -48,7 +44,6 @@ Caffe::~Caffe() { if (curand_generator_) { CURAND_CHECK(curandDestroyGenerator(curand_generator_)); } - if (vsl_stream_) VSL_CHECK(vslDeleteStream(&vsl_stream_)); } void Caffe::set_random_seed(const unsigned int seed) { @@ -64,9 +59,8 @@ void Caffe::set_random_seed(const unsigned int seed) { } else { LOG(ERROR) << "Curand not available. Skipping setting the curand seed."; } - // VSL seed - VSL_CHECK(vslDeleteStream(&(Get().vsl_stream_))); - VSL_CHECK(vslNewStream(&(Get().vsl_stream_), VSL_BRNG_MT19937, seed)); + // RNG seed + Get().random_generator_ = RNG(seed); } void Caffe::SetDevice(const int device_id) { @@ -120,4 +114,37 @@ void Caffe::DeviceQuery() { return; } + +class Caffe::RNG::Generator { + public: + caffe::rng_t rng; +}; + +Caffe::RNG::RNG() +: generator_(new Generator) { } + +Caffe::RNG::RNG(unsigned int seed) +: generator_(new Generator) { + generator_->rng = caffe::rng_t(seed); +} + +Caffe::RNG::~RNG() { delete generator_; } + +Caffe::RNG::RNG(const RNG& other) : generator_(new Generator) { + *generator_ = *other.generator_; +} + +Caffe::RNG& Caffe::RNG::operator=(const RNG& other) { + *generator_ = *other.generator_; + return *this; +} + +void* Caffe::RNG::generator() { + return &generator_->rng; +} + +const void* Caffe::RNG::generator() const { + return &generator_->rng; +} + } // namespace caffe diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 54e90d21034..efc7c0b64f6 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -37,6 +37,8 @@ Layer* GetLayer(const LayerParameter& param) { return new FlattenLayer(param); } else if (type == "hdf5_data") { return new HDF5DataLayer(param); + } else if (type == "hdf5_output") { + return new HDF5OutputLayer(param); } else if (type == "images") { return new ImagesLayer(param); } else if (type == "im2col") { @@ -53,6 +55,8 @@ Layer* GetLayer(const LayerParameter& param) { return new PaddingLayer(param); } else if (type == "pool") { return new PoolingLayer(param); + } else if (type == "regularizer_as_loss") { + return new RegularizerAsLossLayer(param); } else if (type == "relu") { return new ReLULayer(param); } else if (type == "sigmoid") { diff --git a/src/caffe/layers/bnll_layer.cpp b/src/caffe/layers/bnll_layer.cpp index b769a35212a..e7a4fba2d67 100644 --- a/src/caffe/layers/bnll_layer.cpp +++ b/src/caffe/layers/bnll_layer.cpp @@ -13,7 +13,7 @@ namespace caffe { const float kBNLL_THRESHOLD = 50.; template -void BNLLLayer::Forward_cpu(const vector*>& bottom, +Dtype BNLLLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -23,10 +23,11 @@ void BNLLLayer::Forward_cpu(const vector*>& bottom, bottom_data[i] + log(1. + exp(-bottom_data[i])) : log(1. + exp(bottom_data[i])); } + return Dtype(0); } template -Dtype BNLLLayer::Backward_cpu(const vector*>& top, +void BNLLLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { @@ -40,7 +41,6 @@ Dtype BNLLLayer::Backward_cpu(const vector*>& top, bottom_diff[i] = top_diff[i] * expval / (expval + 1.); } } - return Dtype(0); } diff --git a/src/caffe/layers/bnll_layer.cu b/src/caffe/layers/bnll_layer.cu index 1fd200894c3..7252c0222c0 100644 --- a/src/caffe/layers/bnll_layer.cu +++ b/src/caffe/layers/bnll_layer.cu @@ -22,7 +22,7 @@ __global__ void BNLLForward(const int n, const Dtype* in, Dtype* out) { } template -void BNLLLayer::Forward_gpu(const vector*>& bottom, +Dtype BNLLLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -31,6 +31,7 @@ void BNLLLayer::Forward_gpu(const vector*>& bottom, BNLLForward<<>>( count, bottom_data, top_data); CUDA_POST_KERNEL_CHECK; + return Dtype(0); } template @@ -43,7 +44,7 @@ __global__ void BNLLBackward(const int n, const Dtype* in_diff, } template -Dtype BNLLLayer::Backward_gpu(const vector*>& top, +void BNLLLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { @@ -56,7 +57,6 @@ Dtype BNLLLayer::Backward_gpu(const vector*>& top, count, top_diff, bottom_data, bottom_diff); CUDA_POST_KERNEL_CHECK; } - return Dtype(0); } INSTANTIATE_CLASS(BNLLLayer); diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp index dc949c14010..e65451061b0 100644 --- a/src/caffe/layers/concat_layer.cpp +++ b/src/caffe/layers/concat_layer.cpp @@ -42,7 +42,7 @@ void ConcatLayer::SetUp(const vector*>& bottom, } template -void ConcatLayer::Forward_cpu(const vector*>& bottom, +Dtype ConcatLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { Dtype* top_data = (*top)[0]->mutable_cpu_data(); if (concat_dim_== 0) { @@ -69,10 +69,11 @@ void ConcatLayer::Forward_cpu(const vector*>& bottom, LOG(FATAL) << "concat_dim along dim" << concat_dim_ << " not implemented yet"; } + return Dtype(0.); } template -Dtype ConcatLayer::Backward_cpu(const vector*>& top, +void ConcatLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); if (concat_dim_ == 0) { @@ -100,7 +101,6 @@ Dtype ConcatLayer::Backward_cpu(const vector*>& top, LOG(FATAL) << "concat_dim along dim" << concat_dim_ << " not implemented yet"; } - return Dtype(0.); } INSTANTIATE_CLASS(ConcatLayer); diff --git a/src/caffe/layers/concat_layer.cu b/src/caffe/layers/concat_layer.cu index 616a5e61683..8a20cea64cf 100644 --- a/src/caffe/layers/concat_layer.cu +++ b/src/caffe/layers/concat_layer.cu @@ -9,7 +9,7 @@ namespace caffe { template -void ConcatLayer::Forward_gpu(const vector*>& bottom, +Dtype ConcatLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { Dtype* top_data = (*top)[0]->mutable_gpu_data(); if (concat_dim_ == 0) { @@ -36,10 +36,11 @@ void ConcatLayer::Forward_gpu(const vector*>& bottom, LOG(FATAL) << "concat_dim along dim" << concat_dim_ << " not implemented yet"; } + return Dtype(0.); } template -Dtype ConcatLayer::Backward_gpu(const vector*>& top, +void ConcatLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->gpu_diff(); if (concat_dim_ == 0) { @@ -67,7 +68,6 @@ Dtype ConcatLayer::Backward_gpu(const vector*>& top, LOG(FATAL) << "concat_dim along dim" << concat_dim_ << " not implemented yet"; } - return Dtype(0.); } INSTANTIATE_CLASS(ConcatLayer); diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 64a652a8e1d..cb1bca6579c 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -77,7 +77,7 @@ void ConvolutionLayer::SetUp(const vector*>& bottom, template -void ConvolutionLayer::Forward_cpu(const vector*>& bottom, +Dtype ConvolutionLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -104,10 +104,11 @@ void ConvolutionLayer::Forward_cpu(const vector*>& bottom, (Dtype)1., top_data + (*top)[0]->offset(n)); } } + return Dtype(0.); } template -Dtype ConvolutionLayer::Backward_cpu(const vector*>& top, +void ConvolutionLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* weight = this->blobs_[0]->cpu_data(); @@ -159,7 +160,6 @@ Dtype ConvolutionLayer::Backward_cpu(const vector*>& top, bottom_diff + (*bottom)[0]->offset(n)); } } - return Dtype(0.); } INSTANTIATE_CLASS(ConvolutionLayer); diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index a7f56faa97b..f8f605584d1 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -11,7 +11,7 @@ namespace caffe { template -void ConvolutionLayer::Forward_gpu(const vector*>& bottom, +Dtype ConvolutionLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -38,10 +38,11 @@ void ConvolutionLayer::Forward_gpu(const vector*>& bottom, (Dtype)1., top_data + (*top)[0]->offset(n)); } } + return Dtype(0.); } template -Dtype ConvolutionLayer::Backward_gpu(const vector*>& top, +void ConvolutionLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->gpu_diff(); const Dtype* weight = this->blobs_[0]->gpu_data(); @@ -95,7 +96,6 @@ Dtype ConvolutionLayer::Backward_gpu(const vector*>& top, bottom_diff + (*bottom)[0]->offset(n)); } } - return Dtype(0.); } diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp index cc03cdbf0b7..f2ff7ff1d93 100644 --- a/src/caffe/layers/data_layer.cpp +++ b/src/caffe/layers/data_layer.cpp @@ -213,7 +213,7 @@ void DataLayer::SetUp(const vector*>& bottom, } template -void DataLayer::Forward_cpu(const vector*>& bottom, +Dtype DataLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { // First, join the thread CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; @@ -225,12 +225,6 @@ void DataLayer::Forward_cpu(const vector*>& bottom, // Start a new prefetch thread CHECK(!pthread_create(&thread_, NULL, DataLayerPrefetch, reinterpret_cast(this))) << "Pthread execution failed."; -} - -// The backward operations are dummy - they do not carry any computation. -template -Dtype DataLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { return Dtype(0.); } diff --git a/src/caffe/layers/data_layer.cu b/src/caffe/layers/data_layer.cu index 946f30f3b7f..57a375ea205 100644 --- a/src/caffe/layers/data_layer.cu +++ b/src/caffe/layers/data_layer.cu @@ -16,7 +16,7 @@ using std::string; namespace caffe { template -void DataLayer::Forward_gpu(const vector*>& bottom, +Dtype DataLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { // First, join the thread CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; @@ -30,12 +30,6 @@ void DataLayer::Forward_gpu(const vector*>& bottom, // Start a new prefetch thread CHECK(!pthread_create(&thread_, NULL, DataLayerPrefetch, reinterpret_cast(this))) << "Pthread execution failed."; -} - -// The backward operations are dummy - they do not carry any computation. -template -Dtype DataLayer::Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { return Dtype(0.); } diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp index f480853cdf3..f07547ad81a 100644 --- a/src/caffe/layers/dropout_layer.cpp +++ b/src/caffe/layers/dropout_layer.cpp @@ -3,6 +3,7 @@ #include #include "caffe/common.hpp" +#include "caffe/util/math_functions.hpp" #include "caffe/layer.hpp" #include "caffe/syncedmem.hpp" #include "caffe/vision_layers.hpp" @@ -23,7 +24,7 @@ void DropoutLayer::SetUp(const vector*>& bottom, } template -void DropoutLayer::Forward_cpu(const vector*>& bottom, +Dtype DropoutLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -31,18 +32,18 @@ void DropoutLayer::Forward_cpu(const vector*>& bottom, const int count = bottom[0]->count(); if (Caffe::phase() == Caffe::TRAIN) { // Create random numbers - viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(), - count, mask, 1. - threshold_); + caffe_vRngBernoulli(count, mask, 1. - threshold_); for (int i = 0; i < count; ++i) { top_data[i] = bottom_data[i] * mask[i] * scale_; } } else { memcpy(top_data, bottom_data, bottom[0]->count() * sizeof(Dtype)); } + return Dtype(0); } template -Dtype DropoutLayer::Backward_cpu(const vector*>& top, +void DropoutLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { CHECK(Caffe::phase() == Caffe::TRAIN); @@ -55,7 +56,6 @@ Dtype DropoutLayer::Backward_cpu(const vector*>& top, bottom_diff[i] = top_diff[i] * mask[i] * scale_; } } - return Dtype(0); } diff --git a/src/caffe/layers/dropout_layer.cu b/src/caffe/layers/dropout_layer.cu index 0b38ae2a576..dc1f3cf8740 100644 --- a/src/caffe/layers/dropout_layer.cu +++ b/src/caffe/layers/dropout_layer.cu @@ -24,7 +24,7 @@ __global__ void DropoutForward(const int n, const Dtype* in, } template -void DropoutLayer::Forward_gpu(const vector*>& bottom, +Dtype DropoutLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -42,6 +42,7 @@ void DropoutLayer::Forward_gpu(const vector*>& bottom, CUDA_CHECK(cudaMemcpy(top_data, bottom_data, count * sizeof(Dtype), cudaMemcpyDeviceToDevice)); } + return Dtype(0); } template @@ -54,7 +55,7 @@ __global__ void DropoutBackward(const int n, const Dtype* in_diff, } template -Dtype DropoutLayer::Backward_gpu(const vector*>& top, +void DropoutLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { CHECK(Caffe::phase() == Caffe::TRAIN); @@ -68,7 +69,6 @@ Dtype DropoutLayer::Backward_gpu(const vector*>& top, count, top_diff, mask, uint_thres_, scale_, bottom_diff); CUDA_POST_KERNEL_CHECK; } - return Dtype(0); } INSTANTIATE_CLASS(DropoutLayer); diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp index 9e17a8200c1..d8d5c4b6053 100644 --- a/src/caffe/layers/flatten_layer.cpp +++ b/src/caffe/layers/flatten_layer.cpp @@ -22,20 +22,20 @@ void FlattenLayer::SetUp(const vector*>& bottom, } template -void FlattenLayer::Forward_cpu(const vector*>& bottom, +Dtype FlattenLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); caffe_copy(count_, bottom_data, top_data); + return Dtype(0.); } template -Dtype FlattenLayer::Backward_cpu(const vector*>& top, +void FlattenLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); caffe_copy(count_, top_diff, bottom_diff); - return Dtype(0.); } INSTANTIATE_CLASS(FlattenLayer); diff --git a/src/caffe/layers/flatten_layer.cu b/src/caffe/layers/flatten_layer.cu index 571e22e2417..fa1e6aa3141 100644 --- a/src/caffe/layers/flatten_layer.cu +++ b/src/caffe/layers/flatten_layer.cu @@ -9,20 +9,20 @@ namespace caffe { template -void FlattenLayer::Forward_gpu(const vector*>& bottom, +Dtype FlattenLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); caffe_gpu_copy(count_, bottom_data, top_data); + return Dtype(0.); } template -Dtype FlattenLayer::Backward_gpu(const vector*>& top, +void FlattenLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); caffe_gpu_copy(count_, top_diff, bottom_diff); - return Dtype(0.); } INSTANTIATE_CLASS(FlattenLayer); diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp index e5b17fedb20..3f87dbc512e 100644 --- a/src/caffe/layers/hdf5_data_layer.cpp +++ b/src/caffe/layers/hdf5_data_layer.cpp @@ -89,7 +89,7 @@ void HDF5DataLayer::SetUp(const vector*>& bottom, } template -void HDF5DataLayer::Forward_cpu(const vector*>& bottom, +Dtype HDF5DataLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const int batchsize = this->layer_param_.batchsize(); const int data_count = (*top)[0]->count() / (*top)[0]->num(); @@ -118,14 +118,13 @@ void HDF5DataLayer::Forward_cpu(const vector*>& bottom, &label_blob_.cpu_data()[current_row_ * label_data_count], sizeof(Dtype) * label_data_count); } + return Dtype(0.); } // The backward operations are dummy - they do not carry any computation. template -Dtype HDF5DataLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { - return Dtype(0.); -} +void HDF5DataLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { } INSTANTIATE_CLASS(HDF5DataLayer); diff --git a/src/caffe/layers/hdf5_data_layer.cu b/src/caffe/layers/hdf5_data_layer.cu index bed7f35a156..261d404d551 100644 --- a/src/caffe/layers/hdf5_data_layer.cu +++ b/src/caffe/layers/hdf5_data_layer.cu @@ -20,7 +20,7 @@ using std::string; namespace caffe { template -void HDF5DataLayer::Forward_gpu(const vector*>& bottom, +Dtype HDF5DataLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const int batchsize = this->layer_param_.batchsize(); const int data_count = (*top)[0]->count() / (*top)[0]->num(); @@ -53,12 +53,12 @@ void HDF5DataLayer::Forward_gpu(const vector*>& bottom, sizeof(Dtype) * label_data_count, cudaMemcpyHostToDevice)); } + return Dtype(0.); } template -Dtype HDF5DataLayer::Backward_gpu(const vector*>& top, +void HDF5DataLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { - return Dtype(0.); } INSTANTIATE_CLASS(HDF5DataLayer); diff --git a/src/caffe/layers/hdf5_output_layer.cpp b/src/caffe/layers/hdf5_output_layer.cpp new file mode 100644 index 00000000000..f8433c16680 --- /dev/null +++ b/src/caffe/layers/hdf5_output_layer.cpp @@ -0,0 +1,88 @@ +// Copyright 2014 BVLC and contributors. +/* +Contributors: +- kloudkl@github, 2014. +*/ + +#include + +#include "hdf5.h" +#include "hdf5_hl.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/io.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { +using std::vector; + +template +HDF5OutputLayer::HDF5OutputLayer(const LayerParameter& param) + : Layer(param), + file_name_(param.hdf5_output_param().file_name()) { + /* create a HDF5 file */ + file_id_ = H5Fcreate(file_name_.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, + H5P_DEFAULT); + CHECK_GE(file_id_, 0) << "Failed to open HDF5 file" << file_name_; +} + +template +HDF5OutputLayer::~HDF5OutputLayer() { + herr_t status = H5Fclose(file_id_); + CHECK_GE(status, 0) << "Failed to close HDF5 file " << file_name_; +} + +template +void HDF5OutputLayer::SaveBlobs() { + // TODO: no limit on the number of blobs + LOG(INFO) << "Saving HDF5 file" << file_name_; + CHECK_EQ(data_blob_.num(), label_blob_.num()) << + "data blob and label blob must have the same batch size"; + hdf5_save_nd_dataset(file_id_, HDF5_DATA_DATASET_NAME, data_blob_); + hdf5_save_nd_dataset(file_id_, HDF5_DATA_LABEL_NAME, label_blob_); + LOG(INFO) << "Successfully saved " << data_blob_.num() << " rows"; +} + +template +void HDF5OutputLayer::SetUp(const vector*>& bottom, + vector*>* top) { + // TODO: no limit on the number of blobs + CHECK_EQ(bottom.size(), 2) << "HDF5OutputLayer takes two blobs as input."; + CHECK_EQ(top->size(), 0) << "HDF5OutputLayer takes no output blobs."; +} + +template +Dtype HDF5OutputLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + CHECK_GE(bottom.size(), 2); + CHECK_EQ(bottom[0]->num(), bottom[1]->num()); + data_blob_.Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + label_blob_.Reshape(bottom[1]->num(), bottom[1]->channels(), + bottom[1]->height(), bottom[1]->width()); + const int data_datum_dim = bottom[0]->count() / bottom[0]->num(); + const int label_datum_dim = bottom[1]->count() / bottom[1]->num(); + + for (int i = 0; i < bottom[0]->num(); ++i) { + memcpy(&data_blob_.mutable_cpu_data()[i * data_datum_dim], + &bottom[0]->cpu_data()[i * data_datum_dim], + sizeof(Dtype) * data_datum_dim); + memcpy(&label_blob_.mutable_cpu_data()[i * label_datum_dim], + &bottom[1]->cpu_data()[i * label_datum_dim], + sizeof(Dtype) * label_datum_dim); + } + SaveBlobs(); + return Dtype(0.); +} + +template +void HDF5OutputLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + return; +} + +INSTANTIATE_CLASS(HDF5OutputLayer); + +} // namespace caffe diff --git a/src/caffe/layers/hdf5_output_layer.cu b/src/caffe/layers/hdf5_output_layer.cu new file mode 100644 index 00000000000..b5d10888653 --- /dev/null +++ b/src/caffe/layers/hdf5_output_layer.cu @@ -0,0 +1,53 @@ +// Copyright 2014 BVLC and contributors. +/* +Contributors: +- kloudkl@github, 2014. +*/ + +#include + +#include "hdf5.h" +#include "hdf5_hl.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/io.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { +using std::vector; + +template +Dtype HDF5OutputLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + CHECK_GE(bottom.size(), 2); + CHECK_EQ(bottom[0]->num(), bottom[1]->num()); + data_blob_.Reshape(bottom[0]->num(), bottom[0]->channels(), + bottom[0]->height(), bottom[0]->width()); + label_blob_.Reshape(bottom[1]->num(), bottom[1]->channels(), + bottom[1]->height(), bottom[1]->width()); + const int data_datum_dim = bottom[0]->count() / bottom[0]->num(); + const int label_datum_dim = bottom[1]->count() / bottom[1]->num(); + + for (int i = 0; i < bottom[0]->num(); ++i) { + CUDA_CHECK(cudaMemcpy(&data_blob_.mutable_cpu_data()[i * data_datum_dim], + &bottom[0]->gpu_data()[i * data_datum_dim], + sizeof(Dtype) * data_datum_dim, cudaMemcpyDeviceToHost)); + CUDA_CHECK(cudaMemcpy(&label_blob_.mutable_cpu_data()[i * label_datum_dim], + &bottom[1]->gpu_data()[i * label_datum_dim], + sizeof(Dtype) * label_datum_dim, cudaMemcpyDeviceToHost)); + } + SaveBlobs(); + return Dtype(0.); +} + +template +void HDF5OutputLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + return; +} + +INSTANTIATE_CLASS(HDF5OutputLayer); + +} // namespace caffe diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp index e711713b895..a01bfb7c21c 100644 --- a/src/caffe/layers/im2col_layer.cpp +++ b/src/caffe/layers/im2col_layer.cpp @@ -26,7 +26,7 @@ void Im2colLayer::SetUp(const vector*>& bottom, } template -void Im2colLayer::Forward_cpu(const vector*>& bottom, +Dtype Im2colLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -34,10 +34,11 @@ void Im2colLayer::Forward_cpu(const vector*>& bottom, im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_, top_data + (*top)[0]->offset(n)); } + return Dtype(0.); } template -Dtype Im2colLayer::Backward_cpu(const vector*>& top, +void Im2colLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); @@ -45,7 +46,6 @@ Dtype Im2colLayer::Backward_cpu(const vector*>& top, col2im_cpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n)); } - return Dtype(0.); } INSTANTIATE_CLASS(Im2colLayer); diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu index 2d949b12296..64731cc53d8 100644 --- a/src/caffe/layers/im2col_layer.cu +++ b/src/caffe/layers/im2col_layer.cu @@ -10,7 +10,7 @@ namespace caffe { template -void Im2colLayer::Forward_gpu(const vector*>& bottom, +Dtype Im2colLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -18,10 +18,11 @@ void Im2colLayer::Forward_gpu(const vector*>& bottom, im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_, top_data + (*top)[0]->offset(n)); } + return Dtype(0.); } template -Dtype Im2colLayer::Backward_gpu(const vector*>& top, +void Im2colLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); @@ -29,7 +30,6 @@ Dtype Im2colLayer::Backward_gpu(const vector*>& top, col2im_gpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n)); } - return Dtype(0.); } diff --git a/src/caffe/layers/images_layer.cpp b/src/caffe/layers/images_layer.cpp index e750e01b266..6208a9e7fa6 100644 --- a/src/caffe/layers/images_layer.cpp +++ b/src/caffe/layers/images_layer.cpp @@ -233,7 +233,7 @@ void ImagesLayer::SetUp(const vector*>& bottom, } template -void ImagesLayer::Forward_cpu(const vector*>& bottom, +Dtype ImagesLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { // First, join the thread CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; @@ -245,10 +245,11 @@ void ImagesLayer::Forward_cpu(const vector*>& bottom, // Start a new prefetch thread CHECK(!pthread_create(&thread_, NULL, ImagesLayerPrefetch, reinterpret_cast(this))) << "Pthread execution failed."; + return Dtype(0.); } template -void ImagesLayer::Forward_gpu(const vector*>& bottom, +Dtype ImagesLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { // First, join the thread CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; @@ -262,18 +263,6 @@ void ImagesLayer::Forward_gpu(const vector*>& bottom, // Start a new prefetch thread CHECK(!pthread_create(&thread_, NULL, ImagesLayerPrefetch, reinterpret_cast(this))) << "Pthread execution failed."; -} - -// The backward operations are dummy - they do not carry any computation. -template -Dtype ImagesLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { - return Dtype(0.); -} - -template -Dtype ImagesLayer::Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { return Dtype(0.); } diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp index 6987a787ed3..6ea228fefdd 100644 --- a/src/caffe/layers/inner_product_layer.cpp +++ b/src/caffe/layers/inner_product_layer.cpp @@ -1,8 +1,5 @@ // Copyright 2013 Yangqing Jia - -#include - #include #include "caffe/blob.hpp" @@ -61,7 +58,7 @@ void InnerProductLayer::SetUp(const vector*>& bottom, } template -void InnerProductLayer::Forward_cpu(const vector*>& bottom, +Dtype InnerProductLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -73,10 +70,11 @@ void InnerProductLayer::Forward_cpu(const vector*>& bottom, reinterpret_cast(bias_multiplier_->cpu_data()), this->blobs_[1]->cpu_data(), (Dtype)1., top_data); } + return Dtype(0); } template -Dtype InnerProductLayer::Backward_cpu(const vector*>& top, +void InnerProductLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); @@ -96,7 +94,6 @@ Dtype InnerProductLayer::Backward_cpu(const vector*>& top, top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., (*bottom)[0]->mutable_cpu_diff()); } - return Dtype(0); } INSTANTIATE_CLASS(InnerProductLayer); diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu index c7c3e2a99fd..37463b5a971 100644 --- a/src/caffe/layers/inner_product_layer.cu +++ b/src/caffe/layers/inner_product_layer.cu @@ -1,7 +1,5 @@ // Copyright 2013 Yangqing Jia - -#include #include #include @@ -16,7 +14,7 @@ namespace caffe { template -void InnerProductLayer::Forward_gpu(const vector*>& bottom, +Dtype InnerProductLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -28,10 +26,11 @@ void InnerProductLayer::Forward_gpu(const vector*>& bottom, reinterpret_cast(bias_multiplier_->gpu_data()), this->blobs_[1]->gpu_data(), (Dtype)1., top_data); } + return Dtype(0); } template -Dtype InnerProductLayer::Backward_gpu(const vector*>& top, +void InnerProductLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->gpu_diff(); @@ -51,7 +50,6 @@ Dtype InnerProductLayer::Backward_gpu(const vector*>& top, top_diff, this->blobs_[0]->gpu_data(), (Dtype)0., (*bottom)[0]->mutable_gpu_diff()); } - return Dtype(0); } INSTANTIATE_CLASS(InnerProductLayer); diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp index 1c4303d9bd4..ef0074d5454 100644 --- a/src/caffe/layers/loss_layer.cpp +++ b/src/caffe/layers/loss_layer.cpp @@ -28,9 +28,24 @@ void MultinomialLogisticLossLayer::SetUp( CHECK_EQ(bottom[1]->width(), 1); } +template +Dtype MultinomialLogisticLossLayer::Forward_cpu( + const vector*>& bottom, vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + const Dtype* bottom_label = bottom[1]->cpu_data(); + int num = bottom[0]->num(); + int dim = bottom[0]->count() / bottom[0]->num(); + Dtype loss = 0; + for (int i = 0; i < num; ++i) { + int label = static_cast(bottom_label[i]); + Dtype prob = max(bottom_data[i * dim + label], Dtype(kLOG_THRESHOLD)); + loss -= log(prob); + } + return loss / num; +} template -Dtype MultinomialLogisticLossLayer::Backward_cpu( +void MultinomialLogisticLossLayer::Backward_cpu( const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* bottom_data = (*bottom)[0]->cpu_data(); @@ -39,18 +54,13 @@ Dtype MultinomialLogisticLossLayer::Backward_cpu( int num = (*bottom)[0]->num(); int dim = (*bottom)[0]->count() / (*bottom)[0]->num(); memset(bottom_diff, 0, sizeof(Dtype) * (*bottom)[0]->count()); - Dtype loss = 0; for (int i = 0; i < num; ++i) { int label = static_cast(bottom_label[i]); Dtype prob = max(bottom_data[i * dim + label], Dtype(kLOG_THRESHOLD)); - loss -= log(prob); - bottom_diff[i * dim + label] = - 1. / prob / num; + bottom_diff[i * dim + label] = -1. / prob / num; } - return loss / num; } -// TODO: implement the GPU version for multinomial loss - template void InfogainLossLayer::SetUp( @@ -72,7 +82,27 @@ void InfogainLossLayer::SetUp( template -Dtype InfogainLossLayer::Backward_cpu(const vector*>& top, +Dtype InfogainLossLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + const Dtype* bottom_label = bottom[1]->cpu_data(); + const Dtype* infogain_mat = infogain_.cpu_data(); + int num = bottom[0]->num(); + int dim = bottom[0]->count() / bottom[0]->num(); + CHECK_EQ(infogain_.height(), dim); + Dtype loss = 0; + for (int i = 0; i < num; ++i) { + int label = static_cast(bottom_label[i]); + for (int j = 0; j < dim; ++j) { + Dtype prob = max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD)); + loss -= infogain_mat[label * dim + j] * log(prob); + } + } + return loss / num; +} + +template +void InfogainLossLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* bottom_data = (*bottom)[0]->cpu_data(); @@ -82,16 +112,13 @@ Dtype InfogainLossLayer::Backward_cpu(const vector*>& top, int num = (*bottom)[0]->num(); int dim = (*bottom)[0]->count() / (*bottom)[0]->num(); CHECK_EQ(infogain_.height(), dim); - Dtype loss = 0; for (int i = 0; i < num; ++i) { int label = static_cast(bottom_label[i]); for (int j = 0; j < dim; ++j) { Dtype prob = max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD)); - loss -= infogain_mat[label * dim + j] * log(prob); bottom_diff[i * dim + j] = - infogain_mat[label * dim + j] / prob / num; } } - return loss / num; } @@ -110,18 +137,25 @@ void EuclideanLossLayer::SetUp( } template -Dtype EuclideanLossLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { - int count = (*bottom)[0]->count(); - int num = (*bottom)[0]->num(); - caffe_sub(count, (*bottom)[0]->cpu_data(), (*bottom)[1]->cpu_data(), +Dtype EuclideanLossLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + int count = bottom[0]->count(); + int num = bottom[0]->num(); + caffe_sub(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), difference_.mutable_cpu_data()); Dtype loss = caffe_cpu_dot( count, difference_.cpu_data(), difference_.cpu_data()) / num / Dtype(2); + return loss; +} + +template +void EuclideanLossLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + int count = (*bottom)[0]->count(); + int num = (*bottom)[0]->num(); // Compute the gradient - caffe_axpby(count, Dtype(1) / num, difference_.cpu_data(), Dtype(0), + caffe_cpu_axpby(count, Dtype(1) / num, difference_.cpu_data(), Dtype(0), (*bottom)[0]->mutable_cpu_diff()); - return loss; } template @@ -138,7 +172,7 @@ void AccuracyLayer::SetUp( } template -void AccuracyLayer::Forward_cpu(const vector*>& bottom, +Dtype AccuracyLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { Dtype accuracy = 0; Dtype logprob = 0; @@ -166,6 +200,8 @@ void AccuracyLayer::Forward_cpu(const vector*>& bottom, // LOG(INFO) << "Accuracy: " << accuracy; (*top)[0]->mutable_cpu_data()[0] = accuracy / num; (*top)[0]->mutable_cpu_data()[1] = logprob / num; + // Accuracy layer should not be used as a loss function. + return Dtype(0); } INSTANTIATE_CLASS(MultinomialLogisticLossLayer); diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp index 36dbe41ea8c..698debab6a6 100644 --- a/src/caffe/layers/lrn_layer.cpp +++ b/src/caffe/layers/lrn_layer.cpp @@ -28,7 +28,7 @@ void LRNLayer::SetUp(const vector*>& bottom, } template -void LRNLayer::Forward_cpu(const vector*>& bottom, +Dtype LRNLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -72,10 +72,12 @@ void LRNLayer::Forward_cpu(const vector*>& bottom, // In the end, compute output caffe_powx(scale_.count(), scale_data, -beta_, top_data); caffe_mul(scale_.count(), top_data, bottom_data, top_data); + + return Dtype(0.); } template -Dtype LRNLayer::Backward_cpu(const vector*>& top, +void LRNLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* top_data = top[0]->cpu_data(); @@ -126,7 +128,6 @@ Dtype LRNLayer::Backward_cpu(const vector*>& top, padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data); } } - return Dtype(0.); } INSTANTIATE_CLASS(LRNLayer); diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu index 028aa8fa47e..1dcd0c087c0 100644 --- a/src/caffe/layers/lrn_layer.cu +++ b/src/caffe/layers/lrn_layer.cu @@ -65,7 +65,7 @@ __global__ void LRNComputeOutput(const int nthreads, const Dtype* in, } template -void LRNLayer::Forward_gpu(const vector*>& bottom, +Dtype LRNLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { // First, compute scale const Dtype* bottom_data = bottom[0]->gpu_data(); @@ -84,6 +84,7 @@ void LRNLayer::Forward_gpu(const vector*>& bottom, LRNComputeOutput<<>>( n_threads, bottom_data, scale_data, -beta_, top_data); CUDA_POST_KERNEL_CHECK; + return Dtype(0.); } @@ -149,7 +150,7 @@ __global__ void LRNComputeDiff(const int nthreads, const Dtype* bottom_data, } template -Dtype LRNLayer::Backward_gpu(const vector*>& top, +void LRNLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { int n_threads = num_ * height_ * width_; // NOLINT_NEXT_LINE(whitespace/operators) @@ -158,7 +159,6 @@ Dtype LRNLayer::Backward_gpu(const vector*>& top, scale_.gpu_data(), top[0]->gpu_diff(), num_, channels_, height_, width_, size_, -beta_, Dtype(2. * alpha_ * beta_ / size_), (*bottom)[0]->mutable_gpu_diff()); - return Dtype(0.); } diff --git a/src/caffe/layers/padding_layer.cpp b/src/caffe/layers/padding_layer.cpp index 4cb67df0dcf..658cc6ab16c 100644 --- a/src/caffe/layers/padding_layer.cpp +++ b/src/caffe/layers/padding_layer.cpp @@ -29,7 +29,7 @@ void PaddingLayer::SetUp(const vector*>& bottom, } template -void PaddingLayer::Forward_cpu(const vector*>& bottom, +Dtype PaddingLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { Dtype* top_data = (*top)[0]->mutable_cpu_data(); const Dtype* bottom_data = bottom[0]->cpu_data(); @@ -47,10 +47,11 @@ void PaddingLayer::Forward_cpu(const vector*>& bottom, } } } + return Dtype(0.); } template -Dtype PaddingLayer::Backward_cpu(const vector*>& top, +void PaddingLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); @@ -66,7 +67,6 @@ Dtype PaddingLayer::Backward_cpu(const vector*>& top, } } } - return Dtype(0.); } INSTANTIATE_CLASS(PaddingLayer); diff --git a/src/caffe/layers/padding_layer.cu b/src/caffe/layers/padding_layer.cu index 7ec28a9e30f..d476df501fd 100644 --- a/src/caffe/layers/padding_layer.cu +++ b/src/caffe/layers/padding_layer.cu @@ -27,7 +27,7 @@ __global__ void PaddingForward(const int count, const Dtype* in, Dtype* out, } template -void PaddingLayer::Forward_gpu(const vector*>& bottom, +Dtype PaddingLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -39,6 +39,7 @@ void PaddingLayer::Forward_gpu(const vector*>& bottom, count, bottom_data, top_data, NUM_, CHANNEL_, HEIGHT_IN_, WIDTH_IN_, PAD_); CUDA_POST_KERNEL_CHECK; + return Dtype(0); } template @@ -61,7 +62,7 @@ __global__ void PaddingBackward(const int count, const Dtype* in, Dtype* out, } template -Dtype PaddingLayer::Backward_gpu(const vector*>& top, +void PaddingLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { @@ -74,7 +75,6 @@ Dtype PaddingLayer::Backward_gpu(const vector*>& top, PAD_); CUDA_POST_KERNEL_CHECK; } - return Dtype(0); } INSTANTIATE_CLASS(PaddingLayer); diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp index ce30e842c58..3fd421cd640 100644 --- a/src/caffe/layers/pooling_layer.cpp +++ b/src/caffe/layers/pooling_layer.cpp @@ -39,7 +39,7 @@ void PoolingLayer::SetUp(const vector*>& bottom, // TODO(Yangqing): Is there a faster way to do pooling in the channel-first // case? template -void PoolingLayer::Forward_cpu(const vector*>& bottom, +Dtype PoolingLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -111,13 +111,14 @@ void PoolingLayer::Forward_cpu(const vector*>& bottom, default: LOG(FATAL) << "Unknown pooling method."; } + return Dtype(0.); } template -Dtype PoolingLayer::Backward_cpu(const vector*>& top, +void PoolingLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (!propagate_down) { - return Dtype(0.); + return; } const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* top_data = top[0]->cpu_data(); @@ -188,7 +189,6 @@ Dtype PoolingLayer::Backward_cpu(const vector*>& top, default: LOG(FATAL) << "Unknown pooling method."; } - return Dtype(0.); } diff --git a/src/caffe/layers/pooling_layer.cu b/src/caffe/layers/pooling_layer.cu index 357a392976d..63b4d0dbad7 100644 --- a/src/caffe/layers/pooling_layer.cu +++ b/src/caffe/layers/pooling_layer.cu @@ -135,7 +135,7 @@ __global__ void StoPoolForwardTest(const int nthreads, template -void PoolingLayer::Forward_gpu(const vector*>& bottom, +Dtype PoolingLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -179,6 +179,7 @@ void PoolingLayer::Forward_gpu(const vector*>& bottom, LOG(FATAL) << "Unknown pooling method."; } CUDA_POST_KERNEL_CHECK; + return Dtype(0.); } template @@ -277,10 +278,10 @@ __global__ void StoPoolBackward(const int nthreads, template -Dtype PoolingLayer::Backward_gpu(const vector*>& top, +void PoolingLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (!propagate_down) { - return Dtype(0.); + return; } const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff(); @@ -311,7 +312,6 @@ Dtype PoolingLayer::Backward_gpu(const vector*>& top, LOG(FATAL) << "Unknown pooling method."; } CUDA_POST_KERNEL_CHECK; - return Dtype(0.); } diff --git a/src/caffe/layers/regularizer_as_loss_layer.cpp b/src/caffe/layers/regularizer_as_loss_layer.cpp new file mode 100644 index 00000000000..684725e2312 --- /dev/null +++ b/src/caffe/layers/regularizer_as_loss_layer.cpp @@ -0,0 +1,64 @@ +// Copyright 2014 kloudkl@github + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { +using std::vector; + +template +RegularizerAsLossLayer::RegularizerAsLossLayer( + const LayerParameter& param) + : Layer(param), + num_regularizers_(param.regularizer_size()) { + if (num_regularizers_ > 0) { + regularizers_.resize(num_regularizers_); + for (int i = 0; i < num_regularizers_; ++i) { + regularizers_[i].reset(GetRegularizer(param.regularizer(i))); + } + } +} + +template +void RegularizerAsLossLayer::SetUp(const vector*>& bottom, + vector*>* top) { + CHECK_EQ(bottom.size(), 1)<< + "RegularizerAsLossLayer takes one blob as input."; + CHECK_EQ(top->size(), 0) << + "RegularizerAsLossLayer takes no blob as output."; +} + +template +Dtype RegularizerAsLossLayer::Forward_cpu( + const vector*>& bottom, vector*>* top) { + Blob* bottom_data = bottom[0]; + if (bottom_data->count() > 0) { + memset(bottom_data->mutable_cpu_diff(), 0, + bottom_data->count() * sizeof(Dtype)); + Dtype loss = 0; + for (int i = 0; i < num_regularizers_; ++i) { + loss += regularizers_[i]->Regularize_cpu(bottom_data); + } + int num = bottom_data->num(); + // Scale down gradient + caffe_scal(bottom_data->count(), Dtype(1) / num, + bottom_data->mutable_cpu_diff()); + return loss / num; + } + return Dtype(0); +} + +template +void RegularizerAsLossLayer::Backward_cpu( + const vector*>& top, const bool propagate_down, + vector*>* bottom) { + return; +} + +INSTANTIATE_CLASS(RegularizerAsLossLayer); + +} // namespace caffe diff --git a/src/caffe/layers/regularizer_as_loss_layer.cu b/src/caffe/layers/regularizer_as_loss_layer.cu new file mode 100644 index 00000000000..45fd68eb7fd --- /dev/null +++ b/src/caffe/layers/regularizer_as_loss_layer.cu @@ -0,0 +1,43 @@ +// Copyright 2014 kloudkl@github + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { +using std::vector; + +template +Dtype RegularizerAsLossLayer::Forward_gpu( + const vector*>& bottom, vector*>* top) { + Blob* bottom_data = bottom[0]; + if (bottom_data->count() > 0) { + CUDA_CHECK( + cudaMemset(bottom_data->mutable_gpu_diff(), 0, + bottom_data->count() * sizeof(Dtype))); + Dtype loss = 0; + for (int i = 0; i < num_regularizers_; ++i) { + loss += regularizers_[i]->Regularize_gpu(bottom_data); + } + int num = bottom_data->num(); + // Scale down gradient + caffe_gpu_scal(bottom_data->count(), Dtype(1) / num, + bottom_data->mutable_gpu_diff()); + return loss / num; + } + return Dtype(0); +} + +template +void RegularizerAsLossLayer::Backward_gpu( + const vector*>& top, const bool propagate_down, + vector*>* bottom) { + return; +} + +INSTANTIATE_CLASS(RegularizerAsLossLayer); + +} // namespace caffe diff --git a/src/caffe/layers/relu_layer.cpp b/src/caffe/layers/relu_layer.cpp index 27ae94b7cb0..18c675c98c7 100644 --- a/src/caffe/layers/relu_layer.cpp +++ b/src/caffe/layers/relu_layer.cpp @@ -11,7 +11,7 @@ using std::max; namespace caffe { template -void ReLULayer::Forward_cpu(const vector*>& bottom, +Dtype ReLULayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -19,10 +19,11 @@ void ReLULayer::Forward_cpu(const vector*>& bottom, for (int i = 0; i < count; ++i) { top_data[i] = max(bottom_data[i], Dtype(0)); } + return Dtype(0); } template -Dtype ReLULayer::Backward_cpu(const vector*>& top, +void ReLULayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { @@ -34,7 +35,6 @@ Dtype ReLULayer::Backward_cpu(const vector*>& top, bottom_diff[i] = top_diff[i] * (bottom_data[i] > 0); } } - return Dtype(0); } diff --git a/src/caffe/layers/relu_layer.cu b/src/caffe/layers/relu_layer.cu index 20a5a45e2f4..27f5da5cc89 100644 --- a/src/caffe/layers/relu_layer.cu +++ b/src/caffe/layers/relu_layer.cu @@ -18,7 +18,7 @@ __global__ void ReLUForward(const int n, const Dtype* in, Dtype* out) { } template -void ReLULayer::Forward_gpu(const vector*>& bottom, +Dtype ReLULayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -32,6 +32,7 @@ void ReLULayer::Forward_gpu(const vector*>& bottom, // << " top_data: " << (unsigned long)top_data // << " blocks: " << CAFFE_GET_BLOCKS(count) // << " threads: " << CAFFE_CUDA_NUM_THREADS; + return Dtype(0); } template @@ -43,7 +44,7 @@ __global__ void ReLUBackward(const int n, const Dtype* in_diff, } template -Dtype ReLULayer::Backward_gpu(const vector*>& top, +void ReLULayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { @@ -56,7 +57,6 @@ Dtype ReLULayer::Backward_gpu(const vector*>& top, count, top_diff, bottom_data, bottom_diff); CUDA_POST_KERNEL_CHECK; } - return Dtype(0); } INSTANTIATE_CLASS(ReLULayer); diff --git a/src/caffe/layers/sigmoid_layer.cpp b/src/caffe/layers/sigmoid_layer.cpp index ba6ec84e717..44897954677 100644 --- a/src/caffe/layers/sigmoid_layer.cpp +++ b/src/caffe/layers/sigmoid_layer.cpp @@ -15,7 +15,7 @@ inline Dtype sigmoid(Dtype x) { } template -void SigmoidLayer::Forward_cpu(const vector*>& bottom, +Dtype SigmoidLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -23,10 +23,11 @@ void SigmoidLayer::Forward_cpu(const vector*>& bottom, for (int i = 0; i < count; ++i) { top_data[i] = sigmoid(bottom_data[i]); } + return Dtype(0); } template -Dtype SigmoidLayer::Backward_cpu(const vector*>& top, +void SigmoidLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { @@ -39,7 +40,6 @@ Dtype SigmoidLayer::Backward_cpu(const vector*>& top, bottom_diff[i] = top_diff[i] * sigmoid_x * (1. - sigmoid_x); } } - return Dtype(0); } INSTANTIATE_CLASS(SigmoidLayer); diff --git a/src/caffe/layers/sigmoid_layer.cu b/src/caffe/layers/sigmoid_layer.cu index ba311f814a3..3dbdc397bee 100644 --- a/src/caffe/layers/sigmoid_layer.cu +++ b/src/caffe/layers/sigmoid_layer.cu @@ -24,7 +24,7 @@ __global__ void SigmoidForward(const int n, const Dtype* in, Dtype* out) { } template -void SigmoidLayer::Forward_gpu(const vector*>& bottom, +Dtype SigmoidLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -38,6 +38,7 @@ void SigmoidLayer::Forward_gpu(const vector*>& bottom, // << " top_data: " << (unsigned long)top_data // << " blocks: " << CAFFE_GET_BLOCKS(count) // << " threads: " << CAFFE_CUDA_NUM_THREADS; + return Dtype(0); } template @@ -50,7 +51,7 @@ __global__ void SigmoidBackward(const int n, const Dtype* in_diff, } template -Dtype SigmoidLayer::Backward_gpu(const vector*>& top, +void SigmoidLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { @@ -63,7 +64,6 @@ Dtype SigmoidLayer::Backward_gpu(const vector*>& top, count, top_diff, bottom_data, bottom_diff); CUDA_POST_KERNEL_CHECK; } - return Dtype(0); } INSTANTIATE_CLASS(SigmoidLayer); diff --git a/src/caffe/layers/softmax_layer.cpp b/src/caffe/layers/softmax_layer.cpp index 69e95ff6385..0d2e4572c76 100644 --- a/src/caffe/layers/softmax_layer.cpp +++ b/src/caffe/layers/softmax_layer.cpp @@ -28,7 +28,7 @@ void SoftmaxLayer::SetUp(const vector*>& bottom, } template -void SoftmaxLayer::Forward_cpu(const vector*>& bottom, +Dtype SoftmaxLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -56,10 +56,11 @@ void SoftmaxLayer::Forward_cpu(const vector*>& bottom, for (int i = 0; i < num; ++i) { caffe_scal(dim, Dtype(1.) / scale_data[i], top_data + i * dim); } + return Dtype(0); } template -Dtype SoftmaxLayer::Backward_cpu(const vector*>& top, +void SoftmaxLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->cpu_diff(); @@ -79,7 +80,6 @@ Dtype SoftmaxLayer::Backward_cpu(const vector*>& top, scale_data, sum_multiplier_.cpu_data(), 1., bottom_diff); // elementwise multiplication caffe_mul(top[0]->count(), bottom_diff, top_data, bottom_diff); - return Dtype(0); } diff --git a/src/caffe/layers/softmax_layer.cu b/src/caffe/layers/softmax_layer.cu index 2e41a1794df..5efa4909263 100644 --- a/src/caffe/layers/softmax_layer.cu +++ b/src/caffe/layers/softmax_layer.cu @@ -43,7 +43,7 @@ __global__ void kernel_exp(const int num, const Dtype* data, Dtype* out) { } template -void SoftmaxLayer::Forward_gpu(const vector*>& bottom, +Dtype SoftmaxLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -73,11 +73,12 @@ void SoftmaxLayer::Forward_gpu(const vector*>& bottom, kernel_softmax_div<<>>( num, dim, scale_data, top_data); + return Dtype(0); } // TODO(Yangqing): implement the GPU version of softmax. template -Dtype SoftmaxLayer::Backward_gpu(const vector*>& top, +void SoftmaxLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { const Dtype* top_diff = top[0]->gpu_diff(); const Dtype* top_data = top[0]->gpu_data(); @@ -103,7 +104,6 @@ Dtype SoftmaxLayer::Backward_gpu(const vector*>& top, scale_.gpu_data(), sum_multiplier_.gpu_data(), 1., bottom_diff); // elementwise multiplication caffe_gpu_mul(top[0]->count(), bottom_diff, top_data, bottom_diff); - return Dtype(0); } INSTANTIATE_CLASS(SoftmaxLayer); diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp index 6fdaea5a1dd..f9bd82e217a 100644 --- a/src/caffe/layers/softmax_loss_layer.cpp +++ b/src/caffe/layers/softmax_loss_layer.cpp @@ -24,33 +24,39 @@ void SoftmaxWithLossLayer::SetUp(const vector*>& bottom, } template -void SoftmaxWithLossLayer::Forward_cpu( +Dtype SoftmaxWithLossLayer::Forward_cpu( const vector*>& bottom, vector*>* top) { // The forward pass computes the softmax prob values. softmax_bottom_vec_[0] = bottom[0]; softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_); + const Dtype* prob_data = prob_.cpu_data(); + const Dtype* label = bottom[1]->cpu_data(); + int num = prob_.num(); + int dim = prob_.count() / num; + Dtype loss = 0; + for (int i = 0; i < num; ++i) { + loss += -log(max(prob_data[i * dim + static_cast(label[i])], + Dtype(FLT_MIN))); + } + return loss / num; } template -Dtype SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, +void SoftmaxWithLossLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { - // First, compute the diff + // Compute the diff Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); memcpy(bottom_diff, prob_data, sizeof(Dtype) * prob_.count()); const Dtype* label = (*bottom)[1]->cpu_data(); int num = prob_.num(); int dim = prob_.count() / num; - Dtype loss = 0; for (int i = 0; i < num; ++i) { bottom_diff[i * dim + static_cast(label[i])] -= 1; - loss += -log(max(prob_data[i * dim + static_cast(label[i])], - Dtype(FLT_MIN))); } // Scale down gradient caffe_scal(prob_.count(), Dtype(1) / num, bottom_diff); - return loss / num; } diff --git a/src/caffe/layers/softmax_loss_layer.cu b/src/caffe/layers/softmax_loss_layer.cu index 100393caa3d..ab7ee6ee3bb 100644 --- a/src/caffe/layers/softmax_loss_layer.cu +++ b/src/caffe/layers/softmax_loss_layer.cu @@ -13,18 +13,17 @@ using std::max; namespace caffe { template -void SoftmaxWithLossLayer::Forward_gpu( +Dtype SoftmaxWithLossLayer::Forward_gpu( const vector*>& bottom, vector*>* top) { // The forward pass computes the softmax prob values. - softmax_bottom_vec_[0] = bottom[0]; - softmax_layer_->Forward(softmax_bottom_vec_, &softmax_top_vec_); + return Forward_cpu(bottom, top); } template -Dtype SoftmaxWithLossLayer::Backward_gpu(const vector*>& top, +void SoftmaxWithLossLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { // TODO(Yangqing): implement the GPU version of softmax. - return Backward_cpu(top, propagate_down, bottom); + Backward_cpu(top, propagate_down, bottom); } INSTANTIATE_CLASS(SoftmaxWithLossLayer); diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp index f9fc461a11f..a8a240f74a6 100644 --- a/src/caffe/layers/split_layer.cpp +++ b/src/caffe/layers/split_layer.cpp @@ -28,7 +28,7 @@ void SplitLayer::SetUp(const vector*>& bottom, } template -void SplitLayer::Forward_cpu(const vector*>& bottom, +Dtype SplitLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); for (int i = 0; i < top->size(); ++i) { @@ -38,10 +38,11 @@ void SplitLayer::Forward_cpu(const vector*>& bottom, Dtype* top_data = (*top)[i]->mutable_cpu_data(); caffe_copy(count_, bottom_data, top_data); } + return Dtype(0.); } template -Dtype SplitLayer::Backward_cpu(const vector*>& top, +void SplitLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { const Dtype* top_diff = top[0]->cpu_diff(); @@ -58,7 +59,6 @@ Dtype SplitLayer::Backward_cpu(const vector*>& top, caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff); } } - return Dtype(0.); } diff --git a/src/caffe/layers/split_layer.cu b/src/caffe/layers/split_layer.cu index 5f25a460a6a..deccf990a27 100644 --- a/src/caffe/layers/split_layer.cu +++ b/src/caffe/layers/split_layer.cu @@ -9,7 +9,7 @@ namespace caffe { template -void SplitLayer::Forward_gpu(const vector*>& bottom, +Dtype SplitLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); for (int i = 0; i < top->size(); ++i) { @@ -19,10 +19,11 @@ void SplitLayer::Forward_gpu(const vector*>& bottom, Dtype* top_data = (*top)[i]->mutable_gpu_data(); caffe_gpu_copy(count_, bottom_data, top_data); } + return Dtype(0.); } template -Dtype SplitLayer::Backward_gpu(const vector*>& top, +void SplitLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { const Dtype* top_diff = top[0]->gpu_diff(); @@ -39,7 +40,6 @@ Dtype SplitLayer::Backward_gpu(const vector*>& top, caffe_gpu_axpy(count_, Dtype(1.), top_diff, bottom_diff); } } - return Dtype(0.); } diff --git a/src/caffe/layers/tanh_layer.cpp b/src/caffe/layers/tanh_layer.cpp index d6f99560082..c26579234bc 100644 --- a/src/caffe/layers/tanh_layer.cpp +++ b/src/caffe/layers/tanh_layer.cpp @@ -11,7 +11,7 @@ namespace caffe { template -void TanHLayer::Forward_cpu(const vector*>& bottom, +Dtype TanHLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = (*top)[0]->mutable_cpu_data(); @@ -21,10 +21,11 @@ void TanHLayer::Forward_cpu(const vector*>& bottom, exp2x = exp(2*bottom_data[i]); top_data[i] = (exp2x - Dtype(1))/(exp2x + Dtype(1)); } + return Dtype(0); } template -Dtype TanHLayer::Backward_cpu(const vector*>& top, +void TanHLayer::Backward_cpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { @@ -40,7 +41,6 @@ Dtype TanHLayer::Backward_cpu(const vector*>& top, bottom_diff[i] = top_diff[i] * (1 - tanhx*tanhx); } } - return Dtype(0); } INSTANTIATE_CLASS(TanHLayer); diff --git a/src/caffe/layers/tanh_layer.cu b/src/caffe/layers/tanh_layer.cu index c1f8a29cc5c..899b841b069 100644 --- a/src/caffe/layers/tanh_layer.cu +++ b/src/caffe/layers/tanh_layer.cu @@ -19,7 +19,7 @@ __global__ void TanHForward(const int n, const Dtype* in, Dtype* out) { } template -void TanHLayer::Forward_gpu(const vector*>& bottom, +Dtype TanHLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = (*top)[0]->mutable_gpu_data(); @@ -33,6 +33,7 @@ void TanHLayer::Forward_gpu(const vector*>& bottom, // << " top_data: " << (unsigned long)top_data // << " blocks: " << CAFFE_GET_BLOCKS(count) // << " threads: " << CAFFE_CUDA_NUM_THREADS; + return Dtype(0); } template @@ -46,7 +47,7 @@ __global__ void TanHBackward(const int n, const Dtype* in_diff, } template -Dtype TanHLayer::Backward_gpu(const vector*>& top, +void TanHLayer::Backward_gpu(const vector*>& top, const bool propagate_down, vector*>* bottom) { if (propagate_down) { @@ -59,7 +60,6 @@ Dtype TanHLayer::Backward_gpu(const vector*>& top, count, top_diff, bottom_data, bottom_diff); CUDA_POST_KERNEL_CHECK; } - return Dtype(0); } INSTANTIATE_CLASS(TanHLayer); diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp index 87fb54112f1..a288403f284 100644 --- a/src/caffe/layers/window_data_layer.cpp +++ b/src/caffe/layers/window_data_layer.cpp @@ -403,7 +403,7 @@ void WindowDataLayer::SetUp(const vector*>& bottom, } template -void WindowDataLayer::Forward_cpu(const vector*>& bottom, +Dtype WindowDataLayer::Forward_cpu(const vector*>& bottom, vector*>* top) { // First, join the thread CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; @@ -415,10 +415,11 @@ void WindowDataLayer::Forward_cpu(const vector*>& bottom, // Start a new prefetch thread CHECK(!pthread_create(&thread_, NULL, WindowDataLayerPrefetch, reinterpret_cast(this))) << "Pthread execution failed."; + return Dtype(0.); } template -void WindowDataLayer::Forward_gpu(const vector*>& bottom, +Dtype WindowDataLayer::Forward_gpu(const vector*>& bottom, vector*>* top) { // First, join the thread CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed."; @@ -432,18 +433,6 @@ void WindowDataLayer::Forward_gpu(const vector*>& bottom, // Start a new prefetch thread CHECK(!pthread_create(&thread_, NULL, WindowDataLayerPrefetch, reinterpret_cast(this))) << "Pthread execution failed."; -} - -// The backward operations are dummy - they do not carry any computation. -template -Dtype WindowDataLayer::Backward_cpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { - return Dtype(0.); -} - -template -Dtype WindowDataLayer::Backward_gpu(const vector*>& top, - const bool propagate_down, vector*>* bottom) { return Dtype(0.); } diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 1837b0768ae..3018285cd76 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -162,6 +162,12 @@ void Net::Init(const NetParameter& in_param) { LOG(INFO) << "This network produces output " << *it; net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get()); } + for (size_t i = 0; i < blob_names_.size(); ++i) { + blob_names_index_[blob_names_[i]] = i; + } + for (size_t i = 0; i < layer_names_.size(); ++i) { + layer_names_index_[layer_names_[i]] = i; + } GetLearningRateAndWeightDecay(); LOG(INFO) << "Network initialization done."; LOG(INFO) << "Memory required for Data " << memory_used*sizeof(Dtype); @@ -207,27 +213,32 @@ void Net::GetLearningRateAndWeightDecay() { } template -const vector*>& Net::ForwardPrefilled() { +const vector*>& Net::ForwardPrefilled(Dtype* loss) { + if (loss != NULL) { + *loss = Dtype(0.); + } for (int i = 0; i < layers_.size(); ++i) { // LOG(ERROR) << "Forwarding " << layer_names_[i]; - layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]); + Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]); + if (loss != NULL) { + *loss += layer_loss; + } } return net_output_blobs_; } template const vector*>& Net::Forward( - const vector*> & bottom) { + const vector*> & bottom, Dtype* loss) { // Copy bottom to internal bottom for (int i = 0; i < bottom.size(); ++i) { net_input_blobs_[i]->CopyFrom(*bottom[i]); } - return ForwardPrefilled(); + return ForwardPrefilled(loss); } - template -string Net::Forward(const string& input_blob_protos) { +string Net::Forward(const string& input_blob_protos, Dtype* loss) { BlobProtoVector blob_proto_vec; if (net_input_blobs_.size()) { blob_proto_vec.ParseFromString(input_blob_protos); @@ -237,7 +248,7 @@ string Net::Forward(const string& input_blob_protos) { net_input_blobs_[i]->FromProto(blob_proto_vec.blobs(i)); } } - ForwardPrefilled(); + ForwardPrefilled(loss); blob_proto_vec.Clear(); for (int i = 0; i < net_output_blobs_.size(); ++i) { net_output_blobs_[i]->ToProto(blob_proto_vec.add_blobs()); @@ -249,16 +260,12 @@ string Net::Forward(const string& input_blob_protos) { template -Dtype Net::Backward() { - Dtype loss = 0; +void Net::Backward() { for (int i = layers_.size() - 1; i >= 0; --i) { if (layer_need_backward_[i]) { - Dtype layer_loss = layers_[i]->Backward( - top_vecs_[i], true, &bottom_vecs_[i]); - loss += layer_loss; + layers_[i]->Backward(top_vecs_[i], true, &bottom_vecs_[i]); } } - return loss; } template @@ -327,6 +334,42 @@ void Net::Update() { } } +template +bool Net::has_blob(const string& blob_name) { + return blob_names_index_.find(blob_name) != blob_names_index_.end(); +} + +template +const shared_ptr > Net::blob_by_name( + const string& blob_name) { + shared_ptr > blob_ptr; + if (has_blob(blob_name)) { + blob_ptr = blobs_[blob_names_index_[blob_name]]; + } else { + blob_ptr.reset((Blob*)(NULL)); + LOG(WARNING) << "Unknown blob name " << blob_name; + } + return blob_ptr; +} + +template +bool Net::has_layer(const string& layer_name) { + return layer_names_index_.find(layer_name) != layer_names_index_.end(); +} + +template +const shared_ptr > Net::layer_by_name( + const string& layer_name) { + shared_ptr > layer_ptr; + if (has_layer(layer_name)) { + layer_ptr = layers_[layer_names_index_[layer_name]]; + } else { + layer_ptr.reset((Layer*)(NULL)); + LOG(WARNING) << "Unknown layer name " << layer_name; + } + return layer_ptr; +} + INSTANTIATE_CLASS(Net); } // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 5a73a4496e0..b7a2142edea 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -38,6 +38,18 @@ message FillerParameter { optional float std = 6 [default = 1]; // the std value in gaussian filler } +message RegularizerParameter { +// Weight regularizer type + enum RegularizerType { + L1 = 0; + L2 = 1; + MAX_NORM = 2; // not fully implemented yet + } + optional RegularizerType type = 1; + // Coefficent controls how strong to regularize + optional float coeff = 2 [default = 0]; +} + message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the string to specify the layer type @@ -125,6 +137,14 @@ message LayerParameter { // the other dimensions must be the same for all the bottom blobs. // By default it will concatenate blobs along the channels dimension. optional uint32 concat_dim = 65 [default = 1]; + + optional HDF5OutputParameter hdf5_output_param = 1001; + + repeated RegularizerParameter regularizer = 2001; +} + +message HDF5OutputParameter { + optional string file_name = 1; } message LayerConnection { diff --git a/src/caffe/regularizer.cpp b/src/caffe/regularizer.cpp new file mode 100644 index 00000000000..704a36b9c54 --- /dev/null +++ b/src/caffe/regularizer.cpp @@ -0,0 +1,91 @@ +// Copyright 2014 kloudkl@github + +#include // for std::abs + +#include "caffe/proto/caffe.pb.h" +#include "caffe/regularizer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +Dtype Regularizer::Regularize(Blob* bottom) { + Dtype penalty = 0; + if (Caffe::mode() == Caffe::CPU) { + penalty = Regularize_cpu(bottom); + } else if (Caffe::mode() == Caffe::GPU) { + penalty = Regularize_gpu(bottom); + } else { + LOG(FATAL)<< "Unknown mode: " << Caffe::mode(); + } + return penalty; +} + +template +Dtype L1Regularizer::Regularize_cpu(Blob* bottom) { + if (this->coeff_ == 0) { + return Dtype(0.); + } + const Dtype* data = bottom->cpu_data(); + Dtype* diff = bottom->mutable_cpu_diff(); + int count = bottom->count(); + for (int c = 0; c < count; ++c) { + diff[c] += this->coeff_ * caffe_sign(data[c]); + } + Dtype penalty = caffe_cpu_asum(count, data); + return this->coeff_ * penalty; +} + +template +Dtype L2Regularizer::Regularize_cpu(Blob* bottom) { + if (this->coeff_ == 0) { + return Dtype(0); + } + const Dtype* data = bottom->cpu_data(); + Dtype* diff = bottom->mutable_cpu_diff(); + int count = bottom->count(); + caffe_axpy(count, this->coeff_ * 2., data, diff); + Dtype penalty = caffe_cpu_dot(count, data, data); + return this->coeff_ * penalty; +} + +template +Dtype MaxNormRegularizer::Regularize_cpu(Blob* bottom) { + if (this->coeff_ == 0) { + return Dtype(0); + } + const Dtype* data = bottom->cpu_data(); + Dtype* diff = bottom->mutable_cpu_diff(); + int count = bottom->count(); + Dtype penalty = 0; + // TODO: Implement MaxNormRegularizer::Regularize_cpu + return this->coeff_ * penalty; +} + +template +Regularizer* GetRegularizer(const RegularizerParameter& param) { + const RegularizerParameter_RegularizerType type = param.type(); + if (type == REG_TYPE(L1)) { + return new L1Regularizer(param); + } else if (type == REG_TYPE(L2)) { + return new L2Regularizer(param); + } else if (type == REG_TYPE(MAX_NORM)) { + return new MaxNormRegularizer(param); + } else { + LOG(FATAL) << "Unknown regularizer type: " << type; + } + // just to suppress old compiler warnings. + return (Regularizer*) (NULL); +} + +template Regularizer* GetRegularizer( + const RegularizerParameter& param); +template Regularizer* GetRegularizer( + const RegularizerParameter& param); + +INSTANTIATE_CLASS(Regularizer); +INSTANTIATE_CLASS(L1Regularizer); +INSTANTIATE_CLASS(L2Regularizer); +INSTANTIATE_CLASS(MaxNormRegularizer); + +} // namespace caffe diff --git a/src/caffe/regularizer.cu b/src/caffe/regularizer.cu new file mode 100644 index 00000000000..973ed4d6c17 --- /dev/null +++ b/src/caffe/regularizer.cu @@ -0,0 +1,77 @@ +// Copyright 2014 kloudkl@github + +#include // for std::abs + +#include "caffe/common.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/regularizer.hpp" +#include "caffe/util/math_functions.hpp" // for caffe_gpu_asum + +namespace caffe { + +template +__device__ inline int gpu_sign(const Dtype val) { + return (Dtype(0) < val) - (val < Dtype(0)); +} + +template __device__ int gpu_sign(const float val); +template __device__ int gpu_sign(const double val); + +template +__global__ void ScaleSign(const int n, const Dtype coeff, const Dtype* data, + Dtype* diff) { + CUDA_KERNEL_LOOP(index, n) { + diff[index] += coeff * gpu_sign(data[index]); + } +} + +template +Dtype L1Regularizer::Regularize_gpu(Blob* bottom) { + if (this->coeff_ == 0) { + return Dtype(0); + } + const Dtype* data = bottom->gpu_data(); + Dtype* diff = bottom->mutable_gpu_diff(); + int count = bottom->count(); + /* NOLINT_NEXT_LINE(whitespace/operators) */ + ScaleSign<<>>( + count, this->coeff_, data, diff); + CUDA_POST_KERNEL_CHECK; + Dtype penalty = 0; + caffe_gpu_asum(count, data, &penalty); + return this->coeff_ * penalty; +} + +template +Dtype L2Regularizer::Regularize_gpu(Blob* bottom) { + if (this->coeff_ == 0) { + return Dtype(0); + } + const Dtype* data = bottom->gpu_data(); + Dtype* diff = bottom->mutable_gpu_diff(); + int count = bottom->count(); + caffe_gpu_axpy(count, this->coeff_ * 2., data, diff); + Dtype penalty = 0; + caffe_gpu_dot(count, data, data, &penalty); + return this->coeff_ * penalty; +} + +template +Dtype MaxNormRegularizer::Regularize_gpu(Blob* bottom) { + if (this->coeff_ == 0) { + return Dtype(0); + } + const Dtype* data = bottom->cpu_data(); + Dtype* diff = bottom->mutable_cpu_diff(); + int count = bottom->count(); + Dtype penalty = 0; + // TODO: Implement MaxNormRegularizer::Regularize_cpu + return this->coeff_ * penalty; +} + +INSTANTIATE_CLASS(Regularizer); +INSTANTIATE_CLASS(L1Regularizer); +INSTANTIATE_CLASS(L2Regularizer); +INSTANTIATE_CLASS(MaxNormRegularizer); + +} // namespace caffe diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index eb024856841..fb46c4ec4f3 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -215,7 +215,7 @@ void SGDSolver::ComputeUpdateValue() { // Compute the value to history, and then copy them to the blob's diff. Dtype local_rate = rate * net_params_lr[param_id]; Dtype local_decay = weight_decay * net_params_weight_decay[param_id]; - caffe_axpby(net_params[param_id]->count(), local_rate, + caffe_cpu_axpby(net_params[param_id]->count(), local_rate, net_params[param_id]->cpu_diff(), momentum, history_[param_id]->mutable_cpu_data()); if (local_decay) { diff --git a/src/caffe/test/test_common.cpp b/src/caffe/test/test_common.cpp index 275c6e1bf73..12e7168867f 100644 --- a/src/caffe/test/test_common.cpp +++ b/src/caffe/test/test_common.cpp @@ -6,7 +6,7 @@ #include "gtest/gtest.h" #include "caffe/common.hpp" #include "caffe/syncedmem.hpp" - +#include "caffe/util/math_functions.hpp" #include "caffe/test/test_caffe_main.hpp" namespace caffe { @@ -19,10 +19,6 @@ TEST_F(CommonTest, TestCublasHandler) { EXPECT_TRUE(Caffe::cublas_handle()); } -TEST_F(CommonTest, TestVslStream) { - EXPECT_TRUE(Caffe::vsl_stream()); -} - TEST_F(CommonTest, TestBrewMode) { Caffe::set_mode(Caffe::CPU); EXPECT_EQ(Caffe::mode(), Caffe::CPU); @@ -40,18 +36,19 @@ TEST_F(CommonTest, TestRandSeedCPU) { SyncedMemory data_a(10 * sizeof(int)); SyncedMemory data_b(10 * sizeof(int)); Caffe::set_random_seed(1701); - viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(), - 10, reinterpret_cast(data_a.mutable_cpu_data()), 0.5); + caffe_vRngBernoulli(10, + reinterpret_cast(data_a.mutable_cpu_data()), 0.5); + Caffe::set_random_seed(1701); - viRngBernoulli(VSL_RNG_METHOD_BERNOULLI_ICDF, Caffe::vsl_stream(), - 10, reinterpret_cast(data_b.mutable_cpu_data()), 0.5); + caffe_vRngBernoulli(10, + reinterpret_cast(data_b.mutable_cpu_data()), 0.5); + for (int i = 0; i < 10; ++i) { EXPECT_EQ(((const int*)(data_a.cpu_data()))[i], ((const int*)(data_b.cpu_data()))[i]); } } - TEST_F(CommonTest, TestRandSeedGPU) { SyncedMemory data_a(10 * sizeof(unsigned int)); SyncedMemory data_b(10 * sizeof(unsigned int)); @@ -67,5 +64,4 @@ TEST_F(CommonTest, TestRandSeedGPU) { } } - } // namespace caffe diff --git a/src/caffe/test/test_flatten_layer.cpp b/src/caffe/test/test_flatten_layer.cpp index 41c0453696c..f241135db57 100644 --- a/src/caffe/test/test_flatten_layer.cpp +++ b/src/caffe/test/test_flatten_layer.cpp @@ -23,6 +23,7 @@ class FlattenLayerTest : public ::testing::Test { FlattenLayerTest() : blob_bottom_(new Blob(2, 3, 6, 5)), blob_top_(new Blob()) { + Caffe::set_random_seed(1701); // fill the values FillerParameter filler_param; GaussianFiller filler(filler_param); @@ -73,6 +74,8 @@ TYPED_TEST(FlattenLayerTest, TestGPU) { for (int c = 0; c < 3 * 6 * 5; ++c) { EXPECT_EQ(this->blob_top_->data_at(0, c, 0, 0), this->blob_bottom_->data_at(0, c / (6 * 5), (c / 5) % 6, c % 5)); + EXPECT_EQ(this->blob_top_->data_at(1, c, 0, 0), + this->blob_bottom_->data_at(1, c / (6 * 5), (c / 5) % 6, c % 5)); } } diff --git a/src/caffe/test/test_gradient_check_util.hpp b/src/caffe/test/test_gradient_check_util.hpp index 895e9965a9a..6e895241f44 100644 --- a/src/caffe/test/test_gradient_check_util.hpp +++ b/src/caffe/test/test_gradient_check_util.hpp @@ -84,31 +84,30 @@ void GradientChecker::CheckGradientSingle(Layer* layer, } // go through the bottom and parameter blobs // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs."; - for (int blobid = 0; blobid < blobs_to_check.size(); ++blobid) { - Blob* current_blob = blobs_to_check[blobid]; - // LOG(ERROR) << "Blob " << blobid << ": checking " << current_blob->count() - // << " parameters."; + for (int blob_id = 0; blob_id < blobs_to_check.size(); ++blob_id) { + Blob* current_blob = blobs_to_check[blob_id]; + // LOG(ERROR) << "Blob " << blob_id << ": checking " + // << current_blob->count() << " parameters."; // go through the values for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) { // First, obtain the original data Caffe::set_random_seed(seed_); - layer->Forward(*bottom, top); - Dtype computed_objective = GetObjAndGradient(top, top_id, top_data_id); - // Get any additional loss from the layer - computed_objective += layer->Backward(*top, true, bottom); + // Get any loss from the layer + Dtype computed_objective = layer->Forward(*bottom, top); + // Get additional loss from the objective + computed_objective += GetObjAndGradient(top, top_id, top_data_id); + layer->Backward(*top, true, bottom); Dtype computed_gradient = current_blob->cpu_diff()[feat_id]; // compute score by adding stepsize current_blob->mutable_cpu_data()[feat_id] += stepsize_; Caffe::set_random_seed(seed_); - layer->Forward(*bottom, top); - Dtype positive_objective = GetObjAndGradient(top, top_id, top_data_id); - positive_objective += layer->Backward(*top, true, bottom); + Dtype positive_objective = layer->Forward(*bottom, top); + positive_objective += GetObjAndGradient(top, top_id, top_data_id); // compute score by subtracting stepsize current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2; Caffe::set_random_seed(seed_); - layer->Forward(*bottom, top); - Dtype negative_objective = GetObjAndGradient(top, top_id, top_data_id); - negative_objective += layer->Backward(*top, true, bottom); + Dtype negative_objective = layer->Forward(*bottom, top); + negative_objective += GetObjAndGradient(top, top_id, top_data_id); // Recover stepsize current_blob->mutable_cpu_data()[feat_id] += stepsize_; Dtype estimated_gradient = (positive_objective - negative_objective) / @@ -123,7 +122,7 @@ void GradientChecker::CheckGradientSingle(Layer* layer, max(fabs(computed_gradient), fabs(estimated_gradient)), 1.); EXPECT_NEAR(computed_gradient, estimated_gradient, threshold_ * scale) << "debug: (top_id, top_data_id, blob_id, feat_id)=" - << top_id << "," << top_data_id << "," << blobid << "," << feat_id; + << top_id << "," << top_data_id << "," << blob_id << "," << feat_id; } // LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id]; // LOG(ERROR) << "computed gradient: " << computed_gradient diff --git a/src/caffe/test/test_hdf5_output_layer.cpp b/src/caffe/test/test_hdf5_output_layer.cpp new file mode 100644 index 00000000000..3cbfb3f35a8 --- /dev/null +++ b/src/caffe/test/test_hdf5_output_layer.cpp @@ -0,0 +1,127 @@ +// Copyright 2014 kloudkl@github + +#include +#include +#include + +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/util/io.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { +using std::string; +using std::vector; + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template +class HDF5OutputLayerTest : public ::testing::Test { + protected: + HDF5OutputLayerTest() + : output_file_name_("/tmp/test_hdf5_output_layer-sample_data.hdf5"), + input_file_name_("src/caffe/test/test_data/sample_data.h5"), + blob_data_(new Blob()), + blob_label_(new Blob()), + num_(5), + channels_(8), + height_(5), + width_(5) { + } + virtual void SetUp() { + } + + virtual ~HDF5OutputLayerTest() { + delete blob_data_; + delete blob_label_; + } + + void CheckBlobEqual(const Blob& b1, const Blob& b2); + + string output_file_name_; + string input_file_name_; + Blob* const blob_data_; + Blob* const blob_label_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; + int num_; + int channels_; + int height_; + int width_; +}; + +template +void HDF5OutputLayerTest::CheckBlobEqual( + const Blob& b1, const Blob& b2) { + EXPECT_EQ(b1.num(), b2.num()); + EXPECT_EQ(b1.channels(), b2.channels()); + EXPECT_EQ(b1.height(), b2.height()); + EXPECT_EQ(b1.width(), b2.width()); + for (int n = 0; n < b1.num(); ++n) { + for (int c = 0; c < b1.channels(); ++c) { + for (int h = 0; h < b1.height(); ++h) { + for (int w = 0; w < b1.width(); ++w) { + EXPECT_EQ(b1.data_at(n, c, h, w), b1.data_at(n, c, h, w)); + } + } + } + } +} + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(HDF5OutputLayerTest, Dtypes); + +TYPED_TEST(HDF5OutputLayerTest, TestForward) { + LOG(INFO) << "Loading HDF5 file " << this->input_file_name_; + hid_t file_id = H5Fopen(this->input_file_name_.c_str(), H5F_ACC_RDONLY, + H5P_DEFAULT); + ASSERT_GE(file_id, 0) << "Failed to open HDF5 file" << + this->input_file_name_; + hdf5_load_nd_dataset(file_id, HDF5_DATA_DATASET_NAME, 0, 4, + this->blob_data_); + hdf5_load_nd_dataset(file_id, HDF5_DATA_LABEL_NAME, 0, 4, + this->blob_label_); + herr_t status = H5Fclose(file_id); + EXPECT_GE(status, 0) << "Failed to close HDF5 file " << + this->input_file_name_; + this->blob_bottom_vec_.push_back(this->blob_data_); + this->blob_bottom_vec_.push_back(this->blob_label_); + + Caffe::Brew modes[] = { Caffe::CPU, Caffe::GPU }; + for (int m = 0; m < 2; ++m) { + Caffe::set_mode(modes[m]); + LayerParameter param; + param.mutable_hdf5_output_param()->set_file_name(this->output_file_name_); + // This code block ensures that the layer is deconstructed and + // the output hdf5 file is closed. + { + HDF5OutputLayer layer(param); + EXPECT_EQ(layer.file_name(), this->output_file_name_); + layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, &this->blob_top_vec_); + } + hid_t file_id = H5Fopen(this->output_file_name_.c_str(), H5F_ACC_RDONLY, + H5P_DEFAULT); + ASSERT_GE(file_id, 0) << "Failed to open HDF5 file" << + this->input_file_name_; + + Blob* blob_data = new Blob(); + hdf5_load_nd_dataset(file_id, HDF5_DATA_DATASET_NAME, 0, 4, + blob_data); + this->CheckBlobEqual(*(this->blob_data_), *blob_data); + + Blob* blob_label = new Blob(); + hdf5_load_nd_dataset(file_id, HDF5_DATA_LABEL_NAME, 0, 4, + blob_label); + this->CheckBlobEqual(*(this->blob_label_), *blob_label); + + herr_t status = H5Fclose(file_id); + EXPECT_GE(status, 0) << "Failed to close HDF5 file " << + this->output_file_name_; + } +} + +} // namespace caffe diff --git a/src/caffe/test/test_math_functions.cpp b/src/caffe/test/test_math_functions.cpp new file mode 100644 index 00000000000..ca059a9147c --- /dev/null +++ b/src/caffe/test/test_math_functions.cpp @@ -0,0 +1,195 @@ +// Copyright 2014 kloudkl@github + +#include // for uint32_t & uint64_t +#include +#include +#include // for std::fabs +#include // for rand_r + +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/util/math_functions.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +template +class MathFunctionsTest : public ::testing::Test { + protected: + MathFunctionsTest() + : blob_bottom_(new Blob()), + blob_top_(new Blob()) { + } + + virtual void SetUp() { + Caffe::set_random_seed(1701); + this->blob_bottom_->Reshape(11, 17, 19, 23); + this->blob_top_->Reshape(11, 17, 19, 23); + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + filler.Fill(this->blob_top_); + } + + virtual ~MathFunctionsTest() { + delete blob_bottom_; + delete blob_top_; + } + // http://en.wikipedia.org/wiki/Hamming_distance + int ReferenceHammingDistance(const int n, const Dtype* x, const Dtype* y); + + Blob* const blob_bottom_; + Blob* const blob_top_; +}; + +#define REF_HAMMING_DIST(float_type, int_type) \ +template<> \ +int MathFunctionsTest::ReferenceHammingDistance(const int n, \ + const float_type* x, \ + const float_type* y) { \ + int dist = 0; \ + int_type val; \ + for (int i = 0; i < n; ++i) { \ + val = static_cast(x[i]) ^ static_cast(y[i]); \ + /* Count the number of set bits */ \ + while (val) { \ + ++dist; \ + val &= val - 1; \ + } \ + } \ + return dist; \ +} + +REF_HAMMING_DIST(float, uint32_t); +REF_HAMMING_DIST(double, uint64_t); + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(MathFunctionsTest, Dtypes); + +TYPED_TEST(MathFunctionsTest, TestHammingDistance) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + const TypeParam* y = this->blob_top_->cpu_data(); + CHECK_EQ(this->ReferenceHammingDistance(n, x, y), + caffe_hamming_distance(n, x, y)); +} + +TYPED_TEST(MathFunctionsTest, TestAsumCPU) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + TypeParam std_asum = 0; + for (int i = 0; i < n; ++i) { + std_asum += std::fabs(x[i]); + } + TypeParam cpu_asum = caffe_cpu_asum(n, x); + CHECK_LT((cpu_asum - std_asum) / std_asum, 1e-2); +} + +TYPED_TEST(MathFunctionsTest, TestAsumGPU) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + TypeParam std_asum = 0; + for (int i = 0; i < n; ++i) { + std_asum += std::fabs(x[i]); + } + TypeParam gpu_asum; + caffe_gpu_asum(n, this->blob_bottom_->gpu_data(), &gpu_asum); + CHECK_LT((gpu_asum - std_asum) / std_asum, 1e-2); +} + +TYPED_TEST(MathFunctionsTest, TestSignCPU) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + caffe_cpu_sign(n, x, this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* signs = this->blob_bottom_->cpu_diff(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signs[i], x[i] > 0 ? 1 : (x[i] < 0 ? -1 : 0)); + } +} + +TYPED_TEST(MathFunctionsTest, TestSignGPU) { + int n = this->blob_bottom_->count(); + caffe_gpu_sign(n, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* signs = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signs[i], x[i] > 0 ? 1 : (x[i] < 0 ? -1 : 0)); + } +} + +TYPED_TEST(MathFunctionsTest, TestSgnbitCPU) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + caffe_cpu_sgnbit(n, x, this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* signbits = this->blob_bottom_->cpu_diff(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0); + } +} + +TYPED_TEST(MathFunctionsTest, TestSgnbitGPU) { + int n = this->blob_bottom_->count(); + caffe_gpu_sgnbit(n, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* signbits = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(signbits[i], x[i] < 0 ? 1 : 0); + } +} + +TYPED_TEST(MathFunctionsTest, TestFabsCPU) { + int n = this->blob_bottom_->count(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + caffe_cpu_fabs(n, x, this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* abs_val = this->blob_bottom_->cpu_diff(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(abs_val[i], x[i] > 0 ? x[i] : -x[i]); + } +} + +TYPED_TEST(MathFunctionsTest, TestFabsGPU) { + int n = this->blob_bottom_->count(); + caffe_gpu_fabs(n, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* abs_val = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(abs_val[i], x[i] > 0 ? x[i] : -x[i]); + } +} + +TYPED_TEST(MathFunctionsTest, TestScaleCPU) { + int n = this->blob_bottom_->count(); + // NOLINT_NEXT_LINE(runtime/threadsafe_fn) + TypeParam alpha = this->blob_bottom_->cpu_diff()[rand() % + this->blob_bottom_->count()]; + caffe_cpu_scale(n, alpha, this->blob_bottom_->cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + const TypeParam* scaled = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(scaled[i], x[i] * alpha); + } +} + +TYPED_TEST(MathFunctionsTest, TestScaleGPU) { + int n = this->blob_bottom_->count(); + // NOLINT_NEXT_LINE(runtime/threadsafe_fn) + TypeParam alpha = this->blob_bottom_->cpu_diff()[rand() % + this->blob_bottom_->count()]; + caffe_gpu_scale(n, alpha, this->blob_bottom_->gpu_data(), + this->blob_bottom_->mutable_gpu_diff()); + const TypeParam* scaled = this->blob_bottom_->cpu_diff(); + const TypeParam* x = this->blob_bottom_->cpu_data(); + for (int i = 0; i < n; ++i) { + CHECK_EQ(scaled[i], x[i] * alpha); + } +} + +} // namespace caffe diff --git a/src/caffe/test/test_multinomial_logistic_loss_layer.cpp b/src/caffe/test/test_multinomial_logistic_loss_layer.cpp index 5169b708520..5a61df79d89 100644 --- a/src/caffe/test/test_multinomial_logistic_loss_layer.cpp +++ b/src/caffe/test/test_multinomial_logistic_loss_layer.cpp @@ -25,6 +25,7 @@ class MultinomialLogisticLossLayerTest : public ::testing::Test { MultinomialLogisticLossLayerTest() : blob_bottom_data_(new Blob(10, 5, 1, 1)), blob_bottom_label_(new Blob(10, 1, 1, 1)) { + Caffe::set_random_seed(1701); // fill the values FillerParameter filler_param; PositiveUnitballFiller filler(filler_param); @@ -55,7 +56,7 @@ TYPED_TEST(MultinomialLogisticLossLayerTest, TestGradientCPU) { Caffe::set_mode(Caffe::CPU); MultinomialLogisticLossLayer layer(layer_param); layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_); - GradientChecker checker(1e-2, 1e-2, 1701, 0, 0.05); + GradientChecker checker(1e-2, 2*1e-2, 1701, 0, 0.05); checker.CheckGradientSingle(&layer, &(this->blob_bottom_vec_), &(this->blob_top_vec_), 0, -1, -1); } diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp new file mode 100644 index 00000000000..fd7265c47df --- /dev/null +++ b/src/caffe/test/test_net.cpp @@ -0,0 +1,148 @@ +// Copyright 2014 kloudkl@github + +#include +#include +#include +#include + +#include "gtest/gtest.h" +#include "caffe/common.hpp" +#include "caffe/net.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + + +template +class NetTest : public ::testing::Test { + protected: + NetTest() : filename(NULL) { + } + + virtual void SetUp() { // Create the leveldb + filename = tmpnam(NULL); // get temp name + LOG(INFO) << "Using temporary leveldb " << filename; + leveldb::DB* db; + leveldb::Options options; + options.error_if_exists = true; + options.create_if_missing = true; + leveldb::Status status = leveldb::DB::Open(options, filename, &db); + CHECK(status.ok()); + for (int i = 0; i < 5; ++i) { + Datum datum; + datum.set_label(i); + datum.set_channels(2); + datum.set_height(3); + datum.set_width(4); + std::string* data = datum.mutable_data(); + for (int j = 0; j < 24; ++j) { + data->push_back((uint8_t)i); + } + std::stringstream ss; + ss << i; + db->Put(leveldb::WriteOptions(), ss.str(), datum.SerializeAsString()); + } + delete db; + + const string& proto_prefix = + "name: 'TestNetwork' " + "layers: { " + " layer { " + " name: 'data' " + " type: 'data' "; + const string& proto_suffix = + " batchsize: 1 " + " } " + " top: 'data' " + " top: 'label' " + "} " + "layers: { " + " layer { " + " name: 'innerproduct' " + " type: 'innerproduct' " + " num_output: 1000 " + " weight_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " bias_filler { " + " type: 'constant' " + " value: 0 " + " } " + " blobs_lr: 1. " + " blobs_lr: 2. " + " weight_decay: 1. " + " weight_decay: 0. " + " } " + " bottom: 'data' " + " top: 'innerproduct' " + "} " + "layers: { " + " layer { " + " name: 'loss' " + " type: 'softmax_loss' " + " } " + " bottom: 'innerproduct' " + " bottom: 'label' " + "} "; + proto = proto_prefix + "source: '" + string(this->filename) + + "' " + proto_suffix; + } + + virtual ~NetTest() { + } + + char* filename; + string proto; +}; + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(NetTest, Dtypes); + +TYPED_TEST(NetTest, TestHasBlob) { + NetParameter param; + CHECK(google::protobuf::TextFormat::ParseFromString(this->proto, + ¶m)); + Net net(param); + EXPECT_TRUE(net.has_blob("data")); + EXPECT_TRUE(net.has_blob("label")); + EXPECT_TRUE(net.has_blob("innerproduct")); + EXPECT_FALSE(net.has_blob("loss")); +} + +TYPED_TEST(NetTest, TestGetBlob) { + NetParameter param; + CHECK(google::protobuf::TextFormat::ParseFromString(this->proto, + ¶m)); + Net net(param); + EXPECT_EQ(net.blob_by_name("data"), net.blobs()[0]); + EXPECT_EQ(net.blob_by_name("label"), net.blobs()[1]); + EXPECT_EQ(net.blob_by_name("innerproduct"), net.blobs()[2]); + EXPECT_FALSE(net.blob_by_name("loss")); +} + +TYPED_TEST(NetTest, TestHasLayer) { + NetParameter param; + CHECK(google::protobuf::TextFormat::ParseFromString(this->proto, + ¶m)); + Net net(param); + EXPECT_TRUE(net.has_layer("data")); + EXPECT_TRUE(net.has_layer("innerproduct")); + EXPECT_TRUE(net.has_layer("loss")); + EXPECT_FALSE(net.has_layer("label")); +} + +TYPED_TEST(NetTest, TestGetLayerByName) { + NetParameter param; + CHECK(google::protobuf::TextFormat::ParseFromString(this->proto, + ¶m)); + Net net(param); + EXPECT_EQ(net.layer_by_name("data"), net.layers()[0]); + EXPECT_EQ(net.layer_by_name("innerproduct"), net.layers()[1]); + EXPECT_EQ(net.layer_by_name("loss"), net.layers()[2]); + EXPECT_FALSE(net.layer_by_name("label")); +} + +} // namespace caffe diff --git a/src/caffe/test/test_random_number_generator.cpp b/src/caffe/test/test_random_number_generator.cpp new file mode 100644 index 00000000000..267e7731475 --- /dev/null +++ b/src/caffe/test/test_random_number_generator.cpp @@ -0,0 +1,98 @@ +// Copyright 2014 BVLC and contributors. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "caffe/common.hpp" +#include "caffe/syncedmem.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +template +class RandomNumberGeneratorTest : public ::testing::Test { + public: + virtual ~RandomNumberGeneratorTest() {} + + Dtype sample_mean(const Dtype* const seqs, const size_t sample_size) { + double sum = 0; + for (int i = 0; i < sample_size; ++i) { + sum += seqs[i]; + } + return sum / sample_size; + } + + Dtype sample_mean(const int* const seqs, const size_t sample_size) { + Dtype sum = 0; + for (int i = 0; i < sample_size; ++i) { + sum += Dtype(seqs[i]); + } + return sum / sample_size; + } + + Dtype mean_bound(const Dtype std, const size_t sample_size) { + return std/sqrt(static_cast(sample_size)); + } +}; + + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(RandomNumberGeneratorTest, Dtypes); + + +TYPED_TEST(RandomNumberGeneratorTest, TestRngGaussian) { + size_t sample_size = 10000; + SyncedMemory data_a(sample_size * sizeof(TypeParam)); + Caffe::set_random_seed(1701); + TypeParam mu = 0; + TypeParam sigma = 1; + caffe_vRngGaussian(sample_size, + reinterpret_cast(data_a.mutable_cpu_data()), mu, sigma); + TypeParam true_mean = mu; + TypeParam true_std = sigma; + TypeParam bound = this->mean_bound(true_std, sample_size); + TypeParam empirical_mean = + this->sample_mean(reinterpret_cast(data_a.cpu_data()), + sample_size); + EXPECT_NEAR(empirical_mean, true_mean, bound); +} + + +TYPED_TEST(RandomNumberGeneratorTest, TestRngUniform) { + size_t sample_size = 10000; + SyncedMemory data_a(sample_size * sizeof(TypeParam)); + Caffe::set_random_seed(1701); + TypeParam lower = 0; + TypeParam upper = 1; + caffe_vRngUniform(sample_size, + reinterpret_cast(data_a.mutable_cpu_data()), lower, upper); + TypeParam true_mean = (lower + upper) / 2; + TypeParam true_std = (upper - lower) / sqrt(12); + TypeParam bound = this->mean_bound(true_std, sample_size); + TypeParam empirical_mean = + this->sample_mean(reinterpret_cast(data_a.cpu_data()), + sample_size); + EXPECT_NEAR(empirical_mean, true_mean, bound); +} + + +TYPED_TEST(RandomNumberGeneratorTest, TestRngBernoulli) { + size_t sample_size = 10000; + SyncedMemory data_a(sample_size * sizeof(int)); + Caffe::set_random_seed(1701); + double p = 0.3; + caffe_vRngBernoulli(sample_size, + static_cast(data_a.mutable_cpu_data()), p); + TypeParam true_mean = p; + TypeParam true_std = sqrt(p * (1 - p)); + TypeParam bound = this->mean_bound(true_std, sample_size); + TypeParam empirical_mean = + this->sample_mean((const int *)data_a.cpu_data(), sample_size); + EXPECT_NEAR(empirical_mean, true_mean, bound); +} + + +} // namespace caffe diff --git a/src/caffe/test/test_regularizer_as_loss_layer.cpp b/src/caffe/test/test_regularizer_as_loss_layer.cpp new file mode 100644 index 00000000000..fb65d200f87 --- /dev/null +++ b/src/caffe/test/test_regularizer_as_loss_layer.cpp @@ -0,0 +1,154 @@ +// Copyright 2014 kloudkl@github + +#include +#include // for memset +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/regularizer.hpp" +#include "caffe/test/test_gradient_check_util.hpp" +#include "caffe/vision_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "gtest/gtest.h" + +namespace caffe { + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template +class RegularizationAsLossTest : public ::testing::Test { + protected: + RegularizationAsLossTest() + : blob_bottom_data_(new Blob(10, 5, 3, 2)) { + // fill the values + FillerParameter filler_param; + filler_param.set_std(10); + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_data_); + blob_bottom_vec_.push_back(blob_bottom_data_); + } + virtual ~RegularizationAsLossTest() { + delete blob_bottom_data_; + } + + void Check(const bool death_condition, + const LayerParameter& layer_param, const Dtype step_size, + const Dtype threshold, const unsigned int seed = 1701); + + Blob* const blob_bottom_data_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(RegularizationAsLossTest, Dtypes); + +// The death test only abort the current function +// http://code.google.com/p/googletest/wiki/V1_6_AdvancedGuide +// #Propagating_Fatal_Failures +// We want to test all the combinations of coefficients. +// If this subroutine is place in the test cases directly, +// the test cases cannot enumerate the combinations after the first failure. +template +void RegularizationAsLossTest::Check( + const bool is_death_condition, const LayerParameter& layer_param, + const Dtype step_size, const Dtype threshold, const unsigned int seed) { + if (is_death_condition) { + ASSERT_DEATH( + RegularizerAsLossLayer layer(layer_param), + "Regularizer coefficient must be greater than or equal to zero"); + } else { + RegularizerAsLossLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_); + GradientChecker checker(step_size, threshold, seed); + for (int loop = 0; loop < 10; ++loop) { + checker.CheckGradientSingle(&layer, &(this->blob_bottom_vec_), + &(this->blob_top_vec_), 0, -1, -1); + } + } +} + +// ::testing::FLAGS_gtest_death_test_style = "threadsafe"; +// To suppress Google Test warning of death tests running in multiple threads +// http://code.google.com/p/googletest/wiki/AdvancedGuide#Death_Test_Styles +#define TEST_REG_LOSS_LAYER_SINGLE_TYPE(mode, regularizer) \ +TYPED_TEST(RegularizationAsLossTest, TestGradient##mode##_##regularizer) { \ + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; \ + Caffe::set_mode(Caffe::mode); \ + TypeParam coeff[] = {1, 0, -1}; \ + /* Restart from failure crash is too slow. Do not test negative coeff. */ \ + int num_ceoff = 2; \ + bool is_death_condition; \ + for (int i = 0; i < num_ceoff; ++i) { \ + LayerParameter layer_param; \ + RegularizerParameter* reg_param = layer_param.add_regularizer(); \ + reg_param->set_type(REG_TYPE(regularizer)); \ + reg_param->set_coeff(coeff[i]); \ + is_death_condition = coeff[i] < 0; \ + this->Check(is_death_condition, layer_param, 1e-2, 5e-2, 1701); \ + } \ +} + +TEST_REG_LOSS_LAYER_SINGLE_TYPE(CPU, L1); +TEST_REG_LOSS_LAYER_SINGLE_TYPE(CPU, L2); +TEST_REG_LOSS_LAYER_SINGLE_TYPE(CPU, MAX_NORM); + +TEST_REG_LOSS_LAYER_SINGLE_TYPE(GPU, L1); +TEST_REG_LOSS_LAYER_SINGLE_TYPE(GPU, L2); +TEST_REG_LOSS_LAYER_SINGLE_TYPE(GPU, MAX_NORM); + +#define TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(mode, regularizer_type_a, \ + regularizer_type_b) \ +TYPED_TEST(RegularizationAsLossTest, \ + TestGradient##mode##_##regularizer_type_a##_##regularizer_type_b) { \ + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; \ + Caffe::set_mode(Caffe::mode); \ + TypeParam coeff[] = {1, 0, -1}; \ + /* Restart from failure crash is too slow. Do not test negative coeff. */ \ + int num_ceoff = 2; \ + bool is_death_condition; \ + for (int i = 0; i < num_ceoff; ++i) { \ + for (int j = 0; j < num_ceoff; ++j) { \ + LayerParameter layer_param; \ + RegularizerParameter* reg_param; \ + reg_param = layer_param.add_regularizer(); \ + reg_param->set_type(REG_TYPE(regularizer_type_a)); \ + reg_param->set_coeff(coeff[i]); \ + reg_param = layer_param.add_regularizer(); \ + reg_param->set_type(REG_TYPE(regularizer_type_b)); \ + reg_param->set_coeff(coeff[j]); \ + is_death_condition = coeff[i] < 0 || coeff[j] < 0; \ + this->Check(is_death_condition, layer_param, 1e-2, 5e-2, 1701); \ + } \ + } \ +} + +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(CPU, L1, L1); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(CPU, L1, L2); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(CPU, L1, MAX_NORM); + +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(CPU, L2, L1); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(CPU, L2, L2); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(CPU, L2, MAX_NORM); + +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(CPU, MAX_NORM, L1); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(CPU, MAX_NORM, L2); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(CPU, MAX_NORM, MAX_NORM); + +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(GPU, L1, L1); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(GPU, L1, L2); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(GPU, L1, MAX_NORM); + +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(GPU, L2, L1); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(GPU, L2, L2); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(GPU, L2, MAX_NORM); + +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(GPU, MAX_NORM, L1); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(GPU, MAX_NORM, L2); +TEST_REGULARIZER_AS_LOSS_LAYER_TWO_TYPES(GPU, MAX_NORM, MAX_NORM); + +} // namespace caffe diff --git a/src/caffe/test/test_stochastic_pooling.cpp b/src/caffe/test/test_stochastic_pooling.cpp index d60d04e8df7..aedd6f3c2f2 100644 --- a/src/caffe/test/test_stochastic_pooling.cpp +++ b/src/caffe/test/test_stochastic_pooling.cpp @@ -146,8 +146,6 @@ TYPED_TEST(StochasticPoolingLayerTest, TestStochasticGPUTestPhase) { } } - - TYPED_TEST(StochasticPoolingLayerTest, TestGradientGPU) { Caffe::set_mode(Caffe::GPU); Caffe::set_phase(Caffe::TRAIN); @@ -157,7 +155,7 @@ TYPED_TEST(StochasticPoolingLayerTest, TestGradientGPU) { layer_param.set_pool(LayerParameter_PoolMethod_STOCHASTIC); PoolingLayer layer(layer_param); - GradientChecker checker(1e-2, 1e-3); + GradientChecker checker(1e-4, 1e-2); // it is too expensive to call curand multiple times, so we don't do an // exhaustive gradient check. checker.CheckGradient(&layer, &(this->blob_bottom_vec_), diff --git a/src/caffe/test/test_util_blas.cpp b/src/caffe/test/test_util_blas.cpp index 3f3ff8b3a69..57f4eafce7d 100644 --- a/src/caffe/test/test_util_blas.cpp +++ b/src/caffe/test/test_util_blas.cpp @@ -3,7 +3,6 @@ #include #include "cuda_runtime.h" -#include "mkl.h" #include "cublas_v2.h" #include "gtest/gtest.h" diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index 3ac69f9744e..053d7a40d44 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -142,4 +142,30 @@ void hdf5_load_nd_dataset(hid_t file_id, const char* dataset_name_, file_id, dataset_name_, blob->mutable_cpu_data()); } +template <> +void hdf5_save_nd_dataset( + const hid_t file_id, const string dataset_name, const Blob& blob) { + hsize_t dims[HDF5_NUM_DIMS]; + dims[0] = blob.num(); + dims[1] = blob.channels(); + dims[2] = blob.height(); + dims[3] = blob.width(); + herr_t status = H5LTmake_dataset_float( + file_id, dataset_name.c_str(), HDF5_NUM_DIMS, dims, blob.cpu_data()); + CHECK_GE(status, 0) << "Failed to make float dataset " << dataset_name; +} + +template <> +void hdf5_save_nd_dataset( + const hid_t file_id, const string dataset_name, const Blob& blob) { + hsize_t dims[HDF5_NUM_DIMS]; + dims[0] = blob.num(); + dims[1] = blob.channels(); + dims[2] = blob.height(); + dims[3] = blob.width(); + herr_t status = H5LTmake_dataset_double( + file_id, dataset_name.c_str(), HDF5_NUM_DIMS, dims, blob.cpu_data()); + CHECK_GE(status, 0) << "Failed to make double dataset " << dataset_name; +} + } // namespace caffe diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 60656b87093..80e420f5689 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -1,9 +1,14 @@ -// Copyright 2013 Yangqing Jia +// Copyright 2014 BVLC and contributors. -#include +#include +#include #include + +#include + #include "caffe/common.hpp" #include "caffe/util/math_functions.hpp" +#include "caffe/util/rng.hpp" namespace caffe { @@ -103,7 +108,6 @@ template <> void caffe_axpy(const int N, const double alpha, const double* X, double* Y) { cblas_daxpy(N, alpha, X, 1, Y, 1); } - template <> void caffe_gpu_axpy(const int N, const float alpha, const float* X, float* Y) { @@ -116,18 +120,6 @@ void caffe_gpu_axpy(const int N, const double alpha, const double* X, CUBLAS_CHECK(cublasDaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1)); } -template <> -void caffe_axpby(const int N, const float alpha, const float* X, - const float beta, float* Y) { - cblas_saxpby(N, alpha, X, 1, beta, Y, 1); -} - -template <> -void caffe_axpby(const int N, const double alpha, const double* X, - const double beta, double* Y) { - cblas_daxpby(N, alpha, X, 1, beta, Y, 1); -} - template <> void caffe_copy(const int N, const float* X, float* Y) { cblas_scopy(N, X, 1, Y, 1); @@ -183,82 +175,85 @@ void caffe_gpu_axpby(const int N, const double alpha, const double* X, } template <> -void caffe_sqr(const int n, const float* a, float* y) { - vsSqr(n, a, y); +void caffe_cpu_axpby(const int N, const float alpha, const float* X, + const float beta, float* Y) { + cblas_saxpby(N, alpha, X, 1, beta, Y, 1); } template <> -void caffe_sqr(const int n, const double* a, double* y) { - vdSqr(n, a, y); +void caffe_cpu_axpby(const int N, const double alpha, const double* X, + const double beta, double* Y) { + cblas_daxpby(N, alpha, X, 1, beta, Y, 1); } template <> void caffe_add(const int n, const float* a, const float* b, - float* y) { vsAdd(n, a, b, y); } + float* y) { + vsAdd(n, a, b, y); +} template <> void caffe_add(const int n, const double* a, const double* b, - double* y) { vdAdd(n, a, b, y); } + double* y) { + vdAdd(n, a, b, y); +} template <> void caffe_sub(const int n, const float* a, const float* b, - float* y) { vsSub(n, a, b, y); } + float* y) { + vsSub(n, a, b, y); +} template <> void caffe_sub(const int n, const double* a, const double* b, - double* y) { vdSub(n, a, b, y); } + double* y) { + vdSub(n, a, b, y); +} template <> void caffe_mul(const int n, const float* a, const float* b, - float* y) { vsMul(n, a, b, y); } + float* y) { + vsMul(n, a, b, y); +} template <> void caffe_mul(const int n, const double* a, const double* b, - double* y) { vdMul(n, a, b, y); } + double* y) { + vdMul(n, a, b, y); +} template <> void caffe_div(const int n, const float* a, const float* b, - float* y) { vsDiv(n, a, b, y); } + float* y) { + vsDiv(n, a, b, y); +} template <> void caffe_div(const int n, const double* a, const double* b, - double* y) { vdDiv(n, a, b, y); } + double* y) { + vdDiv(n, a, b, y); +} template <> void caffe_powx(const int n, const float* a, const float b, - float* y) { vsPowx(n, a, b, y); } - -template <> -void caffe_powx(const int n, const double* a, const double b, - double* y) { vdPowx(n, a, b, y); } - -template <> -void caffe_vRngUniform(const int n, float* r, - const float a, const float b) { - VSL_CHECK(vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD, Caffe::vsl_stream(), - n, r, a, b)); + float* y) { + vsPowx(n, a, b, y); } template <> -void caffe_vRngUniform(const int n, double* r, - const double a, const double b) { - VSL_CHECK(vdRngUniform(VSL_RNG_METHOD_UNIFORM_STD, Caffe::vsl_stream(), - n, r, a, b)); +void caffe_powx(const int n, const double* a, const double b, + double* y) { + vdPowx(n, a, b, y); } template <> -void caffe_vRngGaussian(const int n, float* r, const float a, - const float sigma) { - VSL_CHECK(vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, - Caffe::vsl_stream(), n, r, a, sigma)); +void caffe_sqr(const int n, const float* a, float* y) { + vsSqr(n, a, y); } - template <> -void caffe_vRngGaussian(const int n, double* r, const double a, - const double sigma) { - VSL_CHECK(vdRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER, - Caffe::vsl_stream(), n, r, a, sigma)); +void caffe_sqr(const int n, const double* a, double* y) { + vdSqr(n, a, y); } template <> @@ -271,6 +266,86 @@ void caffe_exp(const int n, const double* a, double* y) { vdExp(n, a, y); } +template +Dtype caffe_nextafter(const Dtype b) { + return boost::math::nextafter( + b, std::numeric_limits::max()); +} + +template +float caffe_nextafter(const float b); + +template +double caffe_nextafter(const double b); + +template +void caffe_vRngUniform(const int n, Dtype* r, + const Dtype a, const Dtype b) { + CHECK_GE(n, 0); + CHECK(r); + CHECK_LE(a, b); + + boost::uniform_real random_distribution( + a, caffe_nextafter(b)); + boost::variate_generator > variate_generator( + caffe_rng(), random_distribution); + + for (int i = 0; i < n; ++i) { + r[i] = variate_generator(); + } +} + +template +void caffe_vRngUniform(const int n, float* r, + const float a, const float b); +template +void caffe_vRngUniform(const int n, double* r, + const double a, const double b); + +template +void caffe_vRngGaussian(const int n, Dtype* r, const Dtype a, + const Dtype sigma) { + CHECK_GE(n, 0); + CHECK(r); + CHECK_GT(sigma, 0); + boost::normal_distribution random_distribution(a, sigma); + boost::variate_generator > variate_generator( + caffe_rng(), random_distribution); + + for (int i = 0; i < n; ++i) { + r[i] = variate_generator(); + } +} + +template +void caffe_vRngGaussian(const int n, float* r, const float a, + const float sigma); + +template +void caffe_vRngGaussian(const int n, double* r, const double a, + const double sigma); + +template +void caffe_vRngBernoulli(const int n, Dtype* r, const double p) { + CHECK_GE(n, 0); + CHECK(r); + CHECK_GE(p, 0); + CHECK_LE(p, 1); + boost::bernoulli_distribution random_distribution(p); + boost::variate_generator > variate_generator( + caffe_rng(), random_distribution); + + for (int i = 0; i < n; ++i) { + r[i] = variate_generator(); + } +} + +template +void caffe_vRngBernoulli(const int n, int* r, const double p); + template <> float caffe_cpu_dot(const int n, const float* x, const float* y) { return cblas_sdot(n, x, 1, y, 1); @@ -293,4 +368,78 @@ void caffe_gpu_dot(const int n, const double* x, const double* y, CUBLAS_CHECK(cublasDdot(Caffe::cublas_handle(), n, x, 1, y, 1, out)); } +template <> +int caffe_hamming_distance(const int n, const float* x, + const float* y) { + int dist = 0; + for (int i = 0; i < n; ++i) { + dist += __builtin_popcount(static_cast(x[i]) ^ + static_cast(y[i])); + } + return dist; +} + +template <> +int caffe_hamming_distance(const int n, const double* x, + const double* y) { + int dist = 0; + for (int i = 0; i < n; ++i) { + dist += __builtin_popcountl(static_cast(x[i]) ^ + static_cast(y[i])); + } + return dist; +} + +template <> +float caffe_cpu_asum(const int n, const float* x) { + return cblas_sasum(n, x, 1); +} + +template <> +double caffe_cpu_asum(const int n, const double* x) { + return cblas_dasum(n, x, 1); +} + +template <> +void caffe_gpu_asum(const int n, const float* x, float* y) { + CUBLAS_CHECK(cublasSasum(Caffe::cublas_handle(), n, x, 1, y)); +} + +template <> +void caffe_gpu_asum(const int n, const double* x, double* y) { + CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y)); +} + +INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sign); +INSTANTIATE_CAFFE_CPU_UNARY_FUNC(sgnbit); +INSTANTIATE_CAFFE_CPU_UNARY_FUNC(fabs); + +template <> +void caffe_cpu_scale(const int n, const float alpha, const float *x, + float* y) { + cblas_scopy(n, x, 1, y, 1); + cblas_sscal(n, alpha, y, 1); +} + +template <> +void caffe_cpu_scale(const int n, const double alpha, const double *x, + double* y) { + cblas_dcopy(n, x, 1, y, 1); + cblas_dscal(n, alpha, y, 1); +} + +template <> +void caffe_gpu_scale(const int n, const float alpha, const float *x, + float* y) { + CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), n, x, 1, y, 1)); + CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), n, &alpha, y, 1)); +} + +template <> +void caffe_gpu_scale(const int n, const double alpha, const double *x, + double* y) { + CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), n, x, 1, y, 1)); + CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), n, &alpha, y, 1)); +} + } // namespace caffe diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index 5491e246c48..85753aa567a 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -1,5 +1,7 @@ // Copyright 2013 Yangqing Jia +// Copyright 2014 kloudkl@github +#include // CUDA's, not caffe's, for fabs, signbit #include #include #include @@ -33,5 +35,9 @@ void caffe_gpu_mul(const int N, const double* a, N, a, b, y); } +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index]) + - (x[index] < Dtype(0))); +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sgnbit, y[index] = signbit(x[index])); +DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(fabs, y[index] = fabs(x[index])); } // namespace caffe diff --git a/tools/extract_features.cpp b/tools/extract_features.cpp new file mode 100644 index 00000000000..e547db594ba --- /dev/null +++ b/tools/extract_features.cpp @@ -0,0 +1,173 @@ +// Copyright 2014 kloudkl@github + +#include // for snprintf +#include +#include +#include +#include +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/net.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/proto/caffe.pb.h" +#include "caffe/util/io.hpp" + +using namespace caffe; // NOLINT(build/namespaces) + +template +int feature_extraction_pipeline(int argc, char** argv); + +int main(int argc, char** argv) { + return feature_extraction_pipeline(argc, argv); +// return feature_extraction_pipeline(argc, argv); +} + +template +int feature_extraction_pipeline(int argc, char** argv) { + const int num_required_args = 6; + if (argc < num_required_args) { + LOG(ERROR)<< + "This program takes in a trained network and an input data layer, and then" + " extract features of the input data produced by the net.\n" + "Usage: demo_extract_features pretrained_net_param" + " feature_extraction_proto_file extract_feature_blob_name" + " save_feature_leveldb_name num_mini_batches [CPU/GPU] [DEVICE_ID=0]"; + return 1; + } + int arg_pos = num_required_args; + + arg_pos = num_required_args; + if (argc > arg_pos && strcmp(argv[arg_pos], "GPU") == 0) { + LOG(ERROR)<< "Using GPU"; + uint device_id = 0; + if (argc > arg_pos + 1) { + device_id = atoi(argv[arg_pos + 1]); + CHECK_GE(device_id, 0); + } + LOG(ERROR) << "Using Device_id=" << device_id; + Caffe::SetDevice(device_id); + Caffe::set_mode(Caffe::GPU); + } else { + LOG(ERROR) << "Using CPU"; + Caffe::set_mode(Caffe::CPU); + } + Caffe::set_phase(Caffe::TEST); + + NetParameter pretrained_net_param; + + arg_pos = 0; // the name of the executable + string pretrained_binary_proto(argv[++arg_pos]); + ReadProtoFromBinaryFile(pretrained_binary_proto.c_str(), + &pretrained_net_param); + + // Expected prototxt contains at least one data layer such as + // the layer data_layer_name and one feature blob such as the + // fc7 top blob to extract features. + /* + layers { + layer { + name: "data_layer_name" + type: "data" + source: "/path/to/your/images/to/extract/feature/images_leveldb" + meanfile: "/path/to/your/image_mean.binaryproto" + batchsize: 128 + cropsize: 227 + mirror: false + } + top: "data_blob_name" + top: "label_blob_name" + } + layers { + layer { + name: "drop7" + type: "dropout" + dropout_ratio: 0.5 + } + bottom: "fc7" + top: "fc7" + } + */ + NetParameter feature_extraction_net_param; + string feature_extraction_proto(argv[++arg_pos]); + ReadProtoFromTextFile(feature_extraction_proto, + &feature_extraction_net_param); + shared_ptr > feature_extraction_net( + new Net(feature_extraction_net_param)); + feature_extraction_net->CopyTrainedLayersFrom(pretrained_net_param); + + string extract_feature_blob_name(argv[++arg_pos]); + CHECK(feature_extraction_net->has_blob(extract_feature_blob_name)) + << "Unknown feature blob name " << extract_feature_blob_name + << " in the network " << feature_extraction_proto; + + string save_feature_leveldb_name(argv[++arg_pos]); + leveldb::DB* db; + leveldb::Options options; + options.error_if_exists = true; + options.create_if_missing = true; + options.write_buffer_size = 268435456; + LOG(INFO)<< "Opening leveldb " << save_feature_leveldb_name; + leveldb::Status status = leveldb::DB::Open(options, + save_feature_leveldb_name.c_str(), + &db); + CHECK(status.ok()) << "Failed to open leveldb " << save_feature_leveldb_name; + + int num_mini_batches = atoi(argv[++arg_pos]); + + LOG(ERROR)<< "Extacting Features"; + + Datum datum; + leveldb::WriteBatch* batch = new leveldb::WriteBatch(); + const int kMaxKeyStrLength = 100; + char key_str[kMaxKeyStrLength]; + int num_bytes_of_binary_code = sizeof(Dtype); + vector*> input_vec; + int image_index = 0; + for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { + feature_extraction_net->Forward(input_vec); + const shared_ptr > feature_blob = feature_extraction_net + ->blob_by_name(extract_feature_blob_name); + int num_features = feature_blob->num(); + int dim_features = feature_blob->count() / num_features; + Dtype* feature_blob_data; + for (int n = 0; n < num_features; ++n) { + datum.set_height(dim_features); + datum.set_width(1); + datum.set_channels(1); + datum.clear_data(); + datum.clear_float_data(); + feature_blob_data = feature_blob->mutable_cpu_data() + + feature_blob->offset(n); + for (int d = 0; d < dim_features; ++d) { + datum.add_float_data(feature_blob_data[d]); + } + string value; + datum.SerializeToString(&value); + snprintf(key_str, kMaxKeyStrLength, "%d", image_index); + batch->Put(string(key_str), value); + ++image_index; + if (image_index % 1000 == 0) { + db->Write(leveldb::WriteOptions(), batch); + LOG(ERROR)<< "Extracted features of " << image_index << + " query images."; + delete batch; + batch = new leveldb::WriteBatch(); + } + } // for (int n = 0; n < num_features; ++n) + } // for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) + // write the last batch + if (image_index % 1000 != 0) { + db->Write(leveldb::WriteOptions(), batch); + LOG(ERROR)<< "Extracted features of " << image_index << + " query images."; + } + + delete batch; + delete db; + LOG(ERROR)<< "Successfully extracted the features!"; + return 0; +} + diff --git a/tools/net_speed_benchmark.cpp b/tools/net_speed_benchmark.cpp index 96d40a2eb37..43f7b493671 100644 --- a/tools/net_speed_benchmark.cpp +++ b/tools/net_speed_benchmark.cpp @@ -58,9 +58,11 @@ int main(int argc, char** argv) { LOG(ERROR) << "Performing Forward"; // Note that for the speed benchmark, we will assume that the network does // not take any input blobs. - caffe_net.Forward(vector*>()); + float initial_loss; + caffe_net.Forward(vector*>(), &initial_loss); + LOG(ERROR) << "Initial loss: " << initial_loss; LOG(ERROR) << "Performing Backward"; - LOG(ERROR) << "Initial loss: " << caffe_net.Backward(); + caffe_net.Backward(); const vector > >& layers = caffe_net.layers(); vector*> >& bottom_vecs = caffe_net.bottom_vecs();