diff --git a/examples/mnist/lenet_consolidated_solver.prototxt b/examples/mnist/lenet_consolidated_solver.prototxt new file mode 100644 index 00000000000..07cbc211414 --- /dev/null +++ b/examples/mnist/lenet_consolidated_solver.prototxt @@ -0,0 +1,452 @@ +# lenet_consolidated_solver.prototxt consolidates the lenet_solver, lenet_train, +# and lenet_test prototxts into a single file. It also adds an additional test +# net which runs on the training set, e.g., for the purpose of comparing +# train/test accuracy (accuracy is computed only on the test set in the included +# LeNet example). This is mainly included as an example of using these features +# (specify NetParameters directly in the solver, specify multiple test nets) +# if desired. +# +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.01 +momentum: 0.9 +weight_decay: 0.0005 +# The learning rate policy +lr_policy: "inv" +gamma: 0.0001 +power: 0.75 +# Display every 100 iterations +display: 100 +# The maximum number of iterations +max_iter: 10000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "lenet" +# Set a random_seed for repeatable results. +# (For results that vary due to random initialization, comment out the below +# line, or set to a negative integer -- e.g. "random_seed: -1") +random_seed: 1701 +# solver mode: CPU or GPU +solver_mode: GPU + +# The training protocol buffer definition +train_net_param { + name: "LeNet" + layers { + name: "mnist" + type: DATA + top: "data" + top: "label" + data_param { + source: "mnist-train-leveldb" + scale: 0.00390625 + batch_size: 64 + } + } + layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + blobs_lr: 1 + blobs_lr: 2 + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "conv2" + type: CONVOLUTION + bottom: "pool1" + top: "conv2" + blobs_lr: 1 + blobs_lr: 2 + convolution_param { + num_output: 50 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool2" + type: POOLING + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "ip1" + type: INNER_PRODUCT + bottom: "pool2" + top: "ip1" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "relu1" + type: RELU + bottom: "ip1" + top: "ip1" + } + layers { + name: "ip2" + type: INNER_PRODUCT + bottom: "ip1" + top: "ip2" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "ip2" + bottom: "label" + } +} + +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# The testing protocol buffer definition +test_net_param { + name: "LeNet-test" + layers { + name: "mnist" + type: DATA + top: "data" + top: "label" + data_param { + source: "mnist-test-leveldb" + scale: 0.00390625 + batch_size: 100 + } + } + layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "conv2" + type: CONVOLUTION + bottom: "pool1" + top: "conv2" + convolution_param { + num_output: 50 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool2" + type: POOLING + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "ip1" + type: INNER_PRODUCT + bottom: "pool2" + top: "ip1" + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "relu1" + type: RELU + bottom: "ip1" + top: "ip1" + } + layers { + name: "ip2" + type: INNER_PRODUCT + bottom: "ip1" + top: "ip2" + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "prob" + type: SOFTMAX + bottom: "ip2" + top: "prob" + } + layers { + name: "accuracy" + type: ACCURACY + bottom: "prob" + bottom: "label" + top: "accuracy" + } +} + +# The train set has 60K images, so we run 600 test iters (600 * 100 = 60K). +test_iter: 600 +# The protocol buffer definition to test on the train set +test_net_param { + name: "LeNet-test-on-train" + layers { + name: "mnist" + type: DATA + top: "data" + top: "label" + data_param { + source: "mnist-train-leveldb" + scale: 0.00390625 + batch_size: 100 + } + } + layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "conv2" + type: CONVOLUTION + bottom: "pool1" + top: "conv2" + convolution_param { + num_output: 50 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool2" + type: POOLING + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "ip1" + type: INNER_PRODUCT + bottom: "pool2" + top: "ip1" + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "relu1" + type: RELU + bottom: "ip1" + top: "ip1" + } + layers { + name: "ip2" + type: INNER_PRODUCT + bottom: "ip1" + top: "ip2" + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "prob" + type: SOFTMAX + bottom: "ip2" + top: "prob" + } + layers { + name: "accuracy" + type: ACCURACY + bottom: "prob" + bottom: "label" + top: "accuracy" + } +} + +# Expected results for first and last 500 iterations: +# (with portions of log omitted for brevity) +# +# Iteration 0, Testing net (#0) +# Test score #0: 0.067 +# Test score #1: 2.30256 +# Iteration 0, Testing net (#1) +# Test score #0: 0.0670334 +# Test score #1: 2.30258 +# Iteration 100, lr = 0.00992565 +# Iteration 100, loss = 0.280585 +# Iteration 200, lr = 0.00985258 +# Iteration 200, loss = 0.345601 +# Iteration 300, lr = 0.00978075 +# Iteration 300, loss = 0.172217 +# Iteration 400, lr = 0.00971013 +# Iteration 400, loss = 0.261836 +# Iteration 500, lr = 0.00964069 +# Iteration 500, loss = 0.157803 +# Iteration 500, Testing net (#0) +# Test score #0: 0.968 +# Test score #1: 0.0993772 +# Iteration 500, Testing net (#1) +# Test score #0: 0.965883 +# Test score #1: 0.109374 +# +# [...] +# +# Iteration 9500, Testing net (#0) +# Test score #0: 0.9899 +# Test score #1: 0.0308299 +# Iteration 9500, Testing net (#1) +# Test score #0: 0.996816 +# Test score #1: 0.0118238 +# Iteration 9600, lr = 0.00603682 +# Iteration 9600, loss = 0.0126215 +# Iteration 9700, lr = 0.00601382 +# Iteration 9700, loss = 0.00579304 +# Iteration 9800, lr = 0.00599102 +# Iteration 9800, loss = 0.00500633 +# Iteration 9900, lr = 0.00596843 +# Iteration 9900, loss = 0.00796607 +# Iteration 10000, lr = 0.00594604 +# Iteration 10000, loss = 0.00271736 +# Iteration 10000, Testing net (#0) +# Test score #0: 0.9914 +# Test score #1: 0.0276671 +# Iteration 10000, Testing net (#1) +# Test score #0: 0.997782 +# Test score #1: 0.00908085 diff --git a/examples/mnist/train_lenet_consolidated.sh b/examples/mnist/train_lenet_consolidated.sh new file mode 100755 index 00000000000..ff6897e263d --- /dev/null +++ b/examples/mnist/train_lenet_consolidated.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env sh + +TOOLS=../../build/tools + +GLOG_logtostderr=1 $TOOLS/train_net.bin lenet_consolidated_solver.prototxt diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index aef9b22c44d..3112c59e0fc 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -33,7 +33,8 @@ class Solver { // written to disk together with the learned net. void Snapshot(); // The test routine - void Test(); + void TestAll(); + void Test(const int test_net_id = 0); virtual void SnapshotSolverState(SolverState* state) = 0; // The Restore function implements how one should restore the solver to a // previously snapshotted state. You should implement the RestoreSolverState() @@ -44,7 +45,7 @@ class Solver { SolverParameter param_; int iter_; shared_ptr > net_; - shared_ptr > test_net_; + vector > > test_nets_; DISABLE_COPY_AND_ASSIGN(Solver); }; diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 6f57d8d09ad..8eeff57efc0 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -33,6 +33,8 @@ Net::Net(const string& param_file) { template void Net::Init(const NetParameter& in_param) { + LOG(INFO) << "Initializing net from parameters: " << std::endl + << in_param.DebugString(); // Create a copy of in_param with splits added where necessary. NetParameter param; InsertSplits(in_param, ¶m); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index ab3c2fecc5c..ce2f25b8db5 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -57,10 +57,21 @@ message NetParameter { } message SolverParameter { - optional string train_net = 1; // The proto file for the training net. - optional string test_net = 2; // The proto file for the testing net. + // {train,test}_net specify a path to a file containing the {train,test} net + // parameters; {train,test}_net_param specify the net parameters directly + // inside the SolverParameter. + // + // Only either train_net or train_net_param (not both) should be specified. + // You may specify 0 or more test_net and/or test_net_param. All + // nets specified using test_net_param will be tested first, followed by all + // nets specified using test_net (each processed in the order specified in + // the prototxt). + optional string train_net = 1; // The proto filename for the train net. + repeated string test_net = 2; // The proto filenames for the test nets. + optional NetParameter train_net_param = 21; // Full params for the train net. + repeated NetParameter test_net_param = 22; // Full params for the test nets. // The number of iterations for each testing phase. - optional int32 test_iter = 3 [default = 0]; + repeated int32 test_iter = 3; // The number of iterations between two testing phases. optional int32 test_interval = 4 [default = 0]; optional bool test_compute_loss = 19 [default = false]; diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 4932968d0b6..6a8f18f1331 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -19,13 +19,13 @@ namespace caffe { template Solver::Solver(const SolverParameter& param) - : net_(), test_net_() { + : net_() { Init(param); } template Solver::Solver(const string& param_file) - : net_(), test_net_() { + : net_() { SolverParameter param; ReadProtoFromTextFile(param_file, ¶m); Init(param); @@ -33,19 +33,44 @@ Solver::Solver(const string& param_file) template void Solver::Init(const SolverParameter& param) { + LOG(INFO) << "Initializing solver from parameters: " << std::endl + << param.DebugString(); param_ = param; if (param_.random_seed() >= 0) { Caffe::set_random_seed(param_.random_seed()); } // Scaffolding code - LOG(INFO) << "Creating training net."; - net_.reset(new Net(param_.train_net())); - if (param_.has_test_net()) { - LOG(INFO) << "Creating testing net."; - test_net_.reset(new Net(param_.test_net())); - CHECK_GT(param_.test_iter(), 0); + if (param_.has_train_net_param()) { + CHECK(!param_.has_train_net()) << "Either train_net_param or train_net may " + << "be specified, but not both."; + LOG(INFO) << "Creating training net specified in SolverParameter."; + net_.reset(new Net(param_.train_net_param())); + } else { + CHECK(param_.has_train_net()) + << "Neither train_net nor train_net_param were specified."; + LOG(INFO) << "Creating training net from file: " << param_.train_net(); + net_.reset(new Net(param_.train_net())); + } + const int num_test_net_params = param_.test_net_param_size(); + const int num_test_net_files = param_.test_net_size(); + const int num_test_nets = num_test_net_params + num_test_net_files; + if (num_test_nets) { + CHECK_EQ(param_.test_iter_size(), num_test_nets) + << "test_iter must be specified for each test network."; CHECK_GT(param_.test_interval(), 0); } + test_nets_.resize(num_test_nets); + for (int i = 0; i < num_test_net_params; ++i) { + LOG(INFO) << "Creating testing net (#" << i + << ") specified in SolverParameter."; + test_nets_[i].reset(new Net(param_.test_net_param(i))); + } + for (int i = 0, test_net_id = num_test_net_params; + i < num_test_net_files; ++i, ++test_net_id) { + LOG(INFO) << "Creating testing net (#" << test_net_id + << ") from file: " << param.test_net(i); + test_nets_[test_net_id].reset(new Net(param_.test_net(i))); + } LOG(INFO) << "Solver scaffolding done."; } @@ -72,7 +97,7 @@ void Solver::Solve(const char* resume_file) { // there's not enough memory to run the test net and crash, etc.; and to gauge // the effect of the first training iterations. if (param_.test_interval()) { - Test(); + TestAll(); } // For a network that is trained by the solver, no bottom or top vecs @@ -87,7 +112,7 @@ void Solver::Solve(const char* resume_file) { LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss; } if (param_.test_interval() && iter_ % param_.test_interval() == 0) { - Test(); + TestAll(); } // Check if we need to do snapshot if (param_.snapshot() && iter_ % param_.snapshot() == 0) { @@ -102,18 +127,28 @@ void Solver::Solve(const char* resume_file) { template -void Solver::Test() { - LOG(INFO) << "Iteration " << iter_ << ", Testing net"; +void Solver::TestAll() { + for (int test_net_id = 0; test_net_id < test_nets_.size(); ++test_net_id) { + Test(test_net_id); + } +} + + +template +void Solver::Test(const int test_net_id) { + LOG(INFO) << "Iteration " << iter_ + << ", Testing net (#" << test_net_id << ")"; // We need to set phase to test before running. Caffe::set_phase(Caffe::TEST); - CHECK_NOTNULL(test_net_.get())->ShareTrainedLayersWith(net_.get()); + CHECK_NOTNULL(test_nets_[test_net_id].get())-> + ShareTrainedLayersWith(net_.get()); vector test_score; vector*> bottom_vec; Dtype loss = 0; - for (int i = 0; i < param_.test_iter(); ++i) { + for (int i = 0; i < param_.test_iter(test_net_id); ++i) { Dtype iter_loss; const vector*>& result = - test_net_->Forward(bottom_vec, &iter_loss); + test_nets_[test_net_id]->Forward(bottom_vec, &iter_loss); if (param_.test_compute_loss()) { loss += iter_loss; } @@ -135,12 +170,12 @@ void Solver::Test() { } } if (param_.test_compute_loss()) { - loss /= param_.test_iter(); + loss /= param_.test_iter(test_net_id); LOG(INFO) << "Test loss: " << loss; } for (int i = 0; i < test_score.size(); ++i) { LOG(INFO) << "Test score #" << i << ": " - << test_score[i] / param_.test_iter(); + << test_score[i] / param_.test_iter(test_net_id); } Caffe::set_phase(Caffe::TRAIN); }