From 65ef9ff7deadc63a873328b7ca4b827053f5f6d6 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Fri, 9 May 2014 19:51:26 -0700 Subject: [PATCH 1/9] specify NetParameters directly in the SolverParameter --- src/caffe/proto/caffe.proto | 12 ++++++++++-- src/caffe/solver.cpp | 26 ++++++++++++++++++++++---- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index ab3c2fecc5c..eb086a15443 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -57,8 +57,16 @@ message NetParameter { } message SolverParameter { - optional string train_net = 1; // The proto file for the training net. - optional string test_net = 2; // The proto file for the testing net. + // {train,test}_net specify a path to a file containing the {train,test} net + // parameters; {train,test}_net_param specify the net parameters directly + // inside the SolverParameter. + // + // If {train,test}_net is specified, {train,test}_net_param should not be, + // and vice versa. + optional string train_net = 1; // The proto filename for the train net. + optional string test_net = 2; // The proto filename for the test net. + optional NetParameter train_net_param = 21; // Full params for the train net. + optional NetParameter test_net_param = 22; // Full params for the test net. // The number of iterations for each testing phase. optional int32 test_iter = 3 [default = 0]; // The number of iterations between two testing phases. diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 4932968d0b6..77782ac69fb 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -38,11 +38,29 @@ void Solver::Init(const SolverParameter& param) { Caffe::set_random_seed(param_.random_seed()); } // Scaffolding code - LOG(INFO) << "Creating training net."; - net_.reset(new Net(param_.train_net())); - if (param_.has_test_net()) { - LOG(INFO) << "Creating testing net."; + if (param_.has_train_net_param()) { + CHECK(!param_.has_train_net()) << "Either train_net_param or train_net may " + << "be specified, but not both."; + LOG(INFO) << "Creating training net specified in SolverParameter."; + net_.reset(new Net(param_.train_net_param())); + } else { + LOG(INFO) << "Creating training net from file: " << param_.train_net(); + net_.reset(new Net(param_.train_net())); + } + bool has_test_net = false; + NetParameter test_net_param; + if (param_.has_test_net_param()) { + CHECK(!param_.has_test_net()) << "Either test_net_param or test_net may be " + << "specified, but not both."; + LOG(INFO) << "Creating testing net specified in SolverParameter."; + test_net_.reset(new Net(param_.test_net_param())); + has_test_net = true; + } else if (param_.has_test_net()) { + LOG(INFO) << "Creating testing net from file: " << param_.test_net(); test_net_.reset(new Net(param_.test_net())); + has_test_net = true; + } + if (has_test_net) { CHECK_GT(param_.test_iter(), 0); CHECK_GT(param_.test_interval(), 0); } From bf66ac27539c63377d432006589bc43ea6203933 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Fri, 9 May 2014 19:51:59 -0700 Subject: [PATCH 2/9] log {Net,Solver}Parameters on Init --- src/caffe/net.cpp | 2 ++ src/caffe/solver.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 6f57d8d09ad..8eeff57efc0 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -33,6 +33,8 @@ Net::Net(const string& param_file) { template void Net::Init(const NetParameter& in_param) { + LOG(INFO) << "Initializing net from parameters: " << std::endl + << in_param.DebugString(); // Create a copy of in_param with splits added where necessary. NetParameter param; InsertSplits(in_param, ¶m); diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 77782ac69fb..9420ca3c9e1 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -33,6 +33,8 @@ Solver::Solver(const string& param_file) template void Solver::Init(const SolverParameter& param) { + LOG(INFO) << "Initializing solver from parameters: " << std::endl + << param.DebugString(); param_ = param; if (param_.random_seed() >= 0) { Caffe::set_random_seed(param_.random_seed()); From c97fff670299c8600ddd8b0ba54bbd7c325ee2f5 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Fri, 9 May 2014 20:28:55 -0700 Subject: [PATCH 3/9] allow multiple test nets --- include/caffe/solver.hpp | 5 ++-- src/caffe/proto/caffe.proto | 4 +-- src/caffe/solver.cpp | 55 ++++++++++++++++++++++--------------- 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index aef9b22c44d..3112c59e0fc 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -33,7 +33,8 @@ class Solver { // written to disk together with the learned net. void Snapshot(); // The test routine - void Test(); + void TestAll(); + void Test(const int test_net_id = 0); virtual void SnapshotSolverState(SolverState* state) = 0; // The Restore function implements how one should restore the solver to a // previously snapshotted state. You should implement the RestoreSolverState() @@ -44,7 +45,7 @@ class Solver { SolverParameter param_; int iter_; shared_ptr > net_; - shared_ptr > test_net_; + vector > > test_nets_; DISABLE_COPY_AND_ASSIGN(Solver); }; diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index eb086a15443..cf3a9b76c54 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -64,9 +64,9 @@ message SolverParameter { // If {train,test}_net is specified, {train,test}_net_param should not be, // and vice versa. optional string train_net = 1; // The proto filename for the train net. - optional string test_net = 2; // The proto filename for the test net. + repeated string test_net = 2; // The proto filenames for the test nets. optional NetParameter train_net_param = 21; // Full params for the train net. - optional NetParameter test_net_param = 22; // Full params for the test net. + repeated NetParameter test_net_param = 22; // Full params for the test nets. // The number of iterations for each testing phase. optional int32 test_iter = 3 [default = 0]; // The number of iterations between two testing phases. diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 9420ca3c9e1..e3dc70546a0 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -19,13 +19,13 @@ namespace caffe { template Solver::Solver(const SolverParameter& param) - : net_(), test_net_() { + : net_() { Init(param); } template Solver::Solver(const string& param_file) - : net_(), test_net_() { + : net_() { SolverParameter param; ReadProtoFromTextFile(param_file, ¶m); Init(param); @@ -49,23 +49,25 @@ void Solver::Init(const SolverParameter& param) { LOG(INFO) << "Creating training net from file: " << param_.train_net(); net_.reset(new Net(param_.train_net())); } - bool has_test_net = false; - NetParameter test_net_param; - if (param_.has_test_net_param()) { - CHECK(!param_.has_test_net()) << "Either test_net_param or test_net may be " - << "specified, but not both."; - LOG(INFO) << "Creating testing net specified in SolverParameter."; - test_net_.reset(new Net(param_.test_net_param())); - has_test_net = true; - } else if (param_.has_test_net()) { - LOG(INFO) << "Creating testing net from file: " << param_.test_net(); - test_net_.reset(new Net(param_.test_net())); - has_test_net = true; - } - if (has_test_net) { + const int num_test_net_params = param_.test_net_param_size(); + const int num_test_net_files = param_.test_net_size(); + const int num_test_nets = num_test_net_params + num_test_net_files; + if (num_test_nets) { CHECK_GT(param_.test_iter(), 0); CHECK_GT(param_.test_interval(), 0); } + test_nets_.resize(num_test_nets); + for (int i = 0; i < num_test_net_params; ++i) { + LOG(INFO) << "Creating testing net (#" << i + << ") specified in SolverParameter."; + test_nets_[i].reset(new Net(param_.test_net_param(i))); + } + for (int i = 0, test_net_id = num_test_net_params; + i < num_test_net_files; ++i, ++test_net_id) { + LOG(INFO) << "Creating testing net (#" << test_net_id + << ") from file: " << param.test_net(i); + test_nets_[test_net_id].reset(new Net(param_.test_net(i))); + } LOG(INFO) << "Solver scaffolding done."; } @@ -92,7 +94,7 @@ void Solver::Solve(const char* resume_file) { // there's not enough memory to run the test net and crash, etc.; and to gauge // the effect of the first training iterations. if (param_.test_interval()) { - Test(); + TestAll(); } // For a network that is trained by the solver, no bottom or top vecs @@ -107,7 +109,7 @@ void Solver::Solve(const char* resume_file) { LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss; } if (param_.test_interval() && iter_ % param_.test_interval() == 0) { - Test(); + TestAll(); } // Check if we need to do snapshot if (param_.snapshot() && iter_ % param_.snapshot() == 0) { @@ -122,18 +124,27 @@ void Solver::Solve(const char* resume_file) { template -void Solver::Test() { - LOG(INFO) << "Iteration " << iter_ << ", Testing net"; +void Solver::TestAll() { + for (int test_net_id = 0; test_net_id < test_nets_.size(); ++test_net_id) { + Test(test_net_id); + } +} + + +template +void Solver::Test(const int test_net_id) { + LOG(INFO) << "Iteration " << iter_ << ", Testing net (#" << test_net_id << ")"; // We need to set phase to test before running. Caffe::set_phase(Caffe::TEST); - CHECK_NOTNULL(test_net_.get())->ShareTrainedLayersWith(net_.get()); + CHECK_NOTNULL(test_nets_[test_net_id].get())-> + ShareTrainedLayersWith(net_.get()); vector test_score; vector*> bottom_vec; Dtype loss = 0; for (int i = 0; i < param_.test_iter(); ++i) { Dtype iter_loss; const vector*>& result = - test_net_->Forward(bottom_vec, &iter_loss); + test_nets_[test_net_id]->Forward(bottom_vec, &iter_loss); if (param_.test_compute_loss()) { loss += iter_loss; } From b64c597357c85ef1cd882831006a5d3227c613c9 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Fri, 9 May 2014 20:47:06 -0700 Subject: [PATCH 4/9] add a lenet example of specifying train/test net directly in solver; multiple test nets --- .../mnist/lenet_consolidated_solver.prototxt | 396 ++++++++++++++++++ 1 file changed, 396 insertions(+) create mode 100644 examples/mnist/lenet_consolidated_solver.prototxt diff --git a/examples/mnist/lenet_consolidated_solver.prototxt b/examples/mnist/lenet_consolidated_solver.prototxt new file mode 100644 index 00000000000..14a048a3664 --- /dev/null +++ b/examples/mnist/lenet_consolidated_solver.prototxt @@ -0,0 +1,396 @@ +# lenet_consolidated_solver.prototxt consolidates the lenet_solver, lenet_train, +# and lenet_test prototxts into a single file. It also adds an additional test +# net which runs on the training set, e.g., for the purpose of comparing +# train/test accuracy (accuracy is computed only on the test set in the included +# LeNet example. This is mainly included as an example of using these features +# (specify NetParameters directly in the solver, specify multiple test nets) +# if desired. +# +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 +# Carry out testing every 500 training iterations. +test_interval: 500 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.01 +momentum: 0.9 +weight_decay: 0.0005 +# The learning rate policy +lr_policy: "inv" +gamma: 0.0001 +power: 0.75 +# Display every 100 iterations +display: 100 +# The maximum number of iterations +max_iter: 10000 +# snapshot intermediate results +snapshot: 5000 +snapshot_prefix: "lenet" +# Set a random_seed for repeatable results. +# (For results that vary due to random initialization, comment out the below +# line, or set to a negative integer -- e.g. "random_seed: -1") +random_seed: 1701 +# solver mode: CPU or GPU +solver_mode: GPU +# The training protocol buffer definition +train_net_param { + name: "LeNet" + layers { + name: "mnist" + type: DATA + top: "data" + top: "label" + data_param { + source: "mnist-train-leveldb" + scale: 0.00390625 + batch_size: 64 + } + } + layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + blobs_lr: 1 + blobs_lr: 2 + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "conv2" + type: CONVOLUTION + bottom: "pool1" + top: "conv2" + blobs_lr: 1 + blobs_lr: 2 + convolution_param { + num_output: 50 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool2" + type: POOLING + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "ip1" + type: INNER_PRODUCT + bottom: "pool2" + top: "ip1" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "relu1" + type: RELU + bottom: "ip1" + top: "ip1" + } + layers { + name: "ip2" + type: INNER_PRODUCT + bottom: "ip1" + top: "ip2" + blobs_lr: 1 + blobs_lr: 2 + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "loss" + type: SOFTMAX_LOSS + bottom: "ip2" + bottom: "label" + } +} +# The testing protocol buffer definition +test_net_param { + name: "LeNet-test" + layers { + name: "mnist" + type: DATA + top: "data" + top: "label" + data_param { + source: "mnist-test-leveldb" + scale: 0.00390625 + batch_size: 100 + } + } + layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "conv2" + type: CONVOLUTION + bottom: "pool1" + top: "conv2" + convolution_param { + num_output: 50 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool2" + type: POOLING + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "ip1" + type: INNER_PRODUCT + bottom: "pool2" + top: "ip1" + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "relu1" + type: RELU + bottom: "ip1" + top: "ip1" + } + layers { + name: "ip2" + type: INNER_PRODUCT + bottom: "ip1" + top: "ip2" + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "prob" + type: SOFTMAX + bottom: "ip2" + top: "prob" + } + layers { + name: "accuracy" + type: ACCURACY + bottom: "prob" + bottom: "label" + top: "accuracy" + } +} +# The protocol buffer definition to test on the train set +test_net_param { + name: "LeNet-test-on-train" + layers { + name: "mnist" + type: DATA + top: "data" + top: "label" + data_param { + source: "mnist-train-leveldb" + scale: 0.00390625 + batch_size: 100 + } + } + layers { + name: "conv1" + type: CONVOLUTION + bottom: "data" + top: "conv1" + convolution_param { + num_output: 20 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool1" + type: POOLING + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "conv2" + type: CONVOLUTION + bottom: "pool1" + top: "conv2" + convolution_param { + num_output: 50 + kernel_size: 5 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "pool2" + type: POOLING + bottom: "conv2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + layers { + name: "ip1" + type: INNER_PRODUCT + bottom: "pool2" + top: "ip1" + inner_product_param { + num_output: 500 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "relu1" + type: RELU + bottom: "ip1" + top: "ip1" + } + layers { + name: "ip2" + type: INNER_PRODUCT + bottom: "ip1" + top: "ip2" + inner_product_param { + num_output: 10 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + } + } + } + layers { + name: "prob" + type: SOFTMAX + bottom: "ip2" + top: "prob" + } + layers { + name: "accuracy" + type: ACCURACY + bottom: "prob" + bottom: "label" + top: "accuracy" + } +} From 2cd46db70d7625968cbf232d70723825d74d5040 Mon Sep 17 00:00:00 2001 From: Tobias Domhan Date: Sat, 10 May 2014 18:41:05 +0200 Subject: [PATCH 5/9] multiple test_iter --- src/caffe/proto/caffe.proto | 2 +- src/caffe/solver.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index cf3a9b76c54..12c4dc6a8fb 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -68,7 +68,7 @@ message SolverParameter { optional NetParameter train_net_param = 21; // Full params for the train net. repeated NetParameter test_net_param = 22; // Full params for the test nets. // The number of iterations for each testing phase. - optional int32 test_iter = 3 [default = 0]; + repeated int32 test_iter = 3; // The number of iterations between two testing phases. optional int32 test_interval = 4 [default = 0]; optional bool test_compute_loss = 19 [default = false]; diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index e3dc70546a0..e68f7194a90 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -53,7 +53,7 @@ void Solver::Init(const SolverParameter& param) { const int num_test_net_files = param_.test_net_size(); const int num_test_nets = num_test_net_params + num_test_net_files; if (num_test_nets) { - CHECK_GT(param_.test_iter(), 0); + CHECK_EQ(param_.test_iter().size(), num_test_nets) << "you need to specify test_iter for each test network."; CHECK_GT(param_.test_interval(), 0); } test_nets_.resize(num_test_nets); @@ -141,7 +141,7 @@ void Solver::Test(const int test_net_id) { vector test_score; vector*> bottom_vec; Dtype loss = 0; - for (int i = 0; i < param_.test_iter(); ++i) { + for (int i = 0; i < param_.test_iter().Get(test_net_id); ++i) { Dtype iter_loss; const vector*>& result = test_nets_[test_net_id]->Forward(bottom_vec, &iter_loss); @@ -166,12 +166,12 @@ void Solver::Test(const int test_net_id) { } } if (param_.test_compute_loss()) { - loss /= param_.test_iter(); + loss /= param_.test_iter().Get(test_net_id); LOG(INFO) << "Test loss: " << loss; } for (int i = 0; i < test_score.size(); ++i) { LOG(INFO) << "Test score #" << i << ": " - << test_score[i] / param_.test_iter(); + << test_score[i] / param_.test_iter().Get(test_net_id); } Caffe::set_phase(Caffe::TRAIN); } From 0ec86f22d7689a7db98f799a910a7d1b03dab890 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Sat, 10 May 2014 11:46:19 -0700 Subject: [PATCH 6/9] lint and two test_iters in lenet_consolidated_solver --- examples/mnist/lenet_consolidated_solver.prototxt | 13 +++++++++---- src/caffe/solver.cpp | 12 +++++++----- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/examples/mnist/lenet_consolidated_solver.prototxt b/examples/mnist/lenet_consolidated_solver.prototxt index 14a048a3664..76ccbfcccb9 100644 --- a/examples/mnist/lenet_consolidated_solver.prototxt +++ b/examples/mnist/lenet_consolidated_solver.prototxt @@ -6,10 +6,6 @@ # (specify NetParameters directly in the solver, specify multiple test nets) # if desired. # -# test_iter specifies how many forward passes the test should carry out. -# In the case of MNIST, we have test batch size 100 and 100 test iterations, -# covering the full 10,000 testing images. -test_iter: 100 # Carry out testing every 500 training iterations. test_interval: 500 # The base learning rate, momentum and the weight decay of the network. @@ -33,6 +29,7 @@ snapshot_prefix: "lenet" random_seed: 1701 # solver mode: CPU or GPU solver_mode: GPU + # The training protocol buffer definition train_net_param { name: "LeNet" @@ -154,6 +151,11 @@ train_net_param { bottom: "label" } } + +# test_iter specifies how many forward passes the test should carry out. +# In the case of MNIST, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 100 # The testing protocol buffer definition test_net_param { name: "LeNet-test" @@ -274,6 +276,9 @@ test_net_param { top: "accuracy" } } + +# The train set has 60K images, so we run 600 test iters (600 * 100 = 60K). +test_iter: 600 # The protocol buffer definition to test on the train set test_net_param { name: "LeNet-test-on-train" diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index e68f7194a90..48434f4bc32 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -53,7 +53,8 @@ void Solver::Init(const SolverParameter& param) { const int num_test_net_files = param_.test_net_size(); const int num_test_nets = num_test_net_params + num_test_net_files; if (num_test_nets) { - CHECK_EQ(param_.test_iter().size(), num_test_nets) << "you need to specify test_iter for each test network."; + CHECK_EQ(param_.test_iter_size(), num_test_nets) + << "test_iter must be specified for each test network."; CHECK_GT(param_.test_interval(), 0); } test_nets_.resize(num_test_nets); @@ -133,7 +134,8 @@ void Solver::TestAll() { template void Solver::Test(const int test_net_id) { - LOG(INFO) << "Iteration " << iter_ << ", Testing net (#" << test_net_id << ")"; + LOG(INFO) << "Iteration " << iter_ + << ", Testing net (#" << test_net_id << ")"; // We need to set phase to test before running. Caffe::set_phase(Caffe::TEST); CHECK_NOTNULL(test_nets_[test_net_id].get())-> @@ -141,7 +143,7 @@ void Solver::Test(const int test_net_id) { vector test_score; vector*> bottom_vec; Dtype loss = 0; - for (int i = 0; i < param_.test_iter().Get(test_net_id); ++i) { + for (int i = 0; i < param_.test_iter(test_net_id); ++i) { Dtype iter_loss; const vector*>& result = test_nets_[test_net_id]->Forward(bottom_vec, &iter_loss); @@ -166,12 +168,12 @@ void Solver::Test(const int test_net_id) { } } if (param_.test_compute_loss()) { - loss /= param_.test_iter().Get(test_net_id); + loss /= param_.test_iter(test_net_id); LOG(INFO) << "Test loss: " << loss; } for (int i = 0; i < test_score.size(); ++i) { LOG(INFO) << "Test score #" << i << ": " - << test_score[i] / param_.test_iter().Get(test_net_id); + << test_score[i] / param_.test_iter(test_net_id); } Caffe::set_phase(Caffe::TRAIN); } From 511665931f9b31995987a67a1503a2fff81d5750 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Sat, 10 May 2014 12:12:28 -0700 Subject: [PATCH 7/9] add script to run lenet_consolidated_solver and add comment with results for first/last 500 iterations --- .../mnist/lenet_consolidated_solver.prototxt | 55 ++++++++++++++++++- examples/mnist/train_lenet_consolidated.sh | 5 ++ 2 files changed, 58 insertions(+), 2 deletions(-) create mode 100755 examples/mnist/train_lenet_consolidated.sh diff --git a/examples/mnist/lenet_consolidated_solver.prototxt b/examples/mnist/lenet_consolidated_solver.prototxt index 76ccbfcccb9..07cbc211414 100644 --- a/examples/mnist/lenet_consolidated_solver.prototxt +++ b/examples/mnist/lenet_consolidated_solver.prototxt @@ -2,10 +2,10 @@ # and lenet_test prototxts into a single file. It also adds an additional test # net which runs on the training set, e.g., for the purpose of comparing # train/test accuracy (accuracy is computed only on the test set in the included -# LeNet example. This is mainly included as an example of using these features +# LeNet example). This is mainly included as an example of using these features # (specify NetParameters directly in the solver, specify multiple test nets) # if desired. -# +# # Carry out testing every 500 training iterations. test_interval: 500 # The base learning rate, momentum and the weight decay of the network. @@ -399,3 +399,54 @@ test_net_param { top: "accuracy" } } + +# Expected results for first and last 500 iterations: +# (with portions of log omitted for brevity) +# +# Iteration 0, Testing net (#0) +# Test score #0: 0.067 +# Test score #1: 2.30256 +# Iteration 0, Testing net (#1) +# Test score #0: 0.0670334 +# Test score #1: 2.30258 +# Iteration 100, lr = 0.00992565 +# Iteration 100, loss = 0.280585 +# Iteration 200, lr = 0.00985258 +# Iteration 200, loss = 0.345601 +# Iteration 300, lr = 0.00978075 +# Iteration 300, loss = 0.172217 +# Iteration 400, lr = 0.00971013 +# Iteration 400, loss = 0.261836 +# Iteration 500, lr = 0.00964069 +# Iteration 500, loss = 0.157803 +# Iteration 500, Testing net (#0) +# Test score #0: 0.968 +# Test score #1: 0.0993772 +# Iteration 500, Testing net (#1) +# Test score #0: 0.965883 +# Test score #1: 0.109374 +# +# [...] +# +# Iteration 9500, Testing net (#0) +# Test score #0: 0.9899 +# Test score #1: 0.0308299 +# Iteration 9500, Testing net (#1) +# Test score #0: 0.996816 +# Test score #1: 0.0118238 +# Iteration 9600, lr = 0.00603682 +# Iteration 9600, loss = 0.0126215 +# Iteration 9700, lr = 0.00601382 +# Iteration 9700, loss = 0.00579304 +# Iteration 9800, lr = 0.00599102 +# Iteration 9800, loss = 0.00500633 +# Iteration 9900, lr = 0.00596843 +# Iteration 9900, loss = 0.00796607 +# Iteration 10000, lr = 0.00594604 +# Iteration 10000, loss = 0.00271736 +# Iteration 10000, Testing net (#0) +# Test score #0: 0.9914 +# Test score #1: 0.0276671 +# Iteration 10000, Testing net (#1) +# Test score #0: 0.997782 +# Test score #1: 0.00908085 diff --git a/examples/mnist/train_lenet_consolidated.sh b/examples/mnist/train_lenet_consolidated.sh new file mode 100755 index 00000000000..ff6897e263d --- /dev/null +++ b/examples/mnist/train_lenet_consolidated.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env sh + +TOOLS=../../build/tools + +GLOG_logtostderr=1 $TOOLS/train_net.bin lenet_consolidated_solver.prototxt From 41da421e613f161174cd046205583f148998dfa5 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Sat, 10 May 2014 13:57:52 -0700 Subject: [PATCH 8/9] fix proto comment for multiple test nets --- src/caffe/proto/caffe.proto | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 12c4dc6a8fb..ce2f25b8db5 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -61,8 +61,11 @@ message SolverParameter { // parameters; {train,test}_net_param specify the net parameters directly // inside the SolverParameter. // - // If {train,test}_net is specified, {train,test}_net_param should not be, - // and vice versa. + // Only either train_net or train_net_param (not both) should be specified. + // You may specify 0 or more test_net and/or test_net_param. All + // nets specified using test_net_param will be tested first, followed by all + // nets specified using test_net (each processed in the order specified in + // the prototxt). optional string train_net = 1; // The proto filename for the train net. repeated string test_net = 2; // The proto filenames for the test nets. optional NetParameter train_net_param = 21; // Full params for the train net. From ba2875ba9416a5789cc8e6d18758ce87177495a8 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Sat, 10 May 2014 14:09:31 -0700 Subject: [PATCH 9/9] require either train_net or train_net_param to be specified --- src/caffe/solver.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index 48434f4bc32..6a8f18f1331 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -46,6 +46,8 @@ void Solver::Init(const SolverParameter& param) { LOG(INFO) << "Creating training net specified in SolverParameter."; net_.reset(new Net(param_.train_net_param())); } else { + CHECK(param_.has_train_net()) + << "Neither train_net nor train_net_param were specified."; LOG(INFO) << "Creating training net from file: " << param_.train_net(); net_.reset(new Net(param_.train_net())); }