diff --git a/examples/mnist/lenet_consolidated_solver.prototxt b/examples/mnist/lenet_consolidated_solver.prototxt
new file mode 100644
index 00000000000..07cbc211414
--- /dev/null
+++ b/examples/mnist/lenet_consolidated_solver.prototxt
@@ -0,0 +1,452 @@
+# lenet_consolidated_solver.prototxt consolidates the lenet_solver, lenet_train,
+# and lenet_test prototxts into a single file.  It also adds an additional test
+# net which runs on the training set, e.g., for the purpose of comparing
+# train/test accuracy (accuracy is computed only on the test set in the included
+# LeNet example).  This is mainly included as an example of using these features
+# (specify NetParameters directly in the solver, specify multiple test nets)
+# if desired.
+#
+# Carry out testing every 500 training iterations.
+test_interval: 500
+# The base learning rate, momentum and the weight decay of the network.
+base_lr: 0.01
+momentum: 0.9
+weight_decay: 0.0005
+# The learning rate policy
+lr_policy: "inv"
+gamma: 0.0001
+power: 0.75
+# Display every 100 iterations
+display: 100
+# The maximum number of iterations
+max_iter: 10000
+# snapshot intermediate results
+snapshot: 5000
+snapshot_prefix: "lenet"
+# Set a random_seed for repeatable results.
+# (For results that vary due to random initialization, comment out the below
+# line, or set to a negative integer -- e.g. "random_seed: -1")
+random_seed: 1701
+# solver mode: CPU or GPU
+solver_mode: GPU
+
+# The training protocol buffer definition
+train_net_param {
+  name: "LeNet"
+  layers {
+    name: "mnist"
+    type: DATA
+    top: "data"
+    top: "label"
+    data_param {
+      source: "mnist-train-leveldb"
+      scale: 0.00390625
+      batch_size: 64
+    }
+  }
+  layers {
+    name: "conv1"
+    type: CONVOLUTION
+    bottom: "data"
+    top: "conv1"
+    blobs_lr: 1
+    blobs_lr: 2
+    convolution_param {
+      num_output: 20
+      kernel_size: 5
+      stride: 1
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "pool1"
+    type: POOLING
+    bottom: "conv1"
+    top: "pool1"
+    pooling_param {
+      pool: MAX
+      kernel_size: 2
+      stride: 2
+    }
+  }
+  layers {
+    name: "conv2"
+    type: CONVOLUTION
+    bottom: "pool1"
+    top: "conv2"
+    blobs_lr: 1
+    blobs_lr: 2
+    convolution_param {
+      num_output: 50
+      kernel_size: 5
+      stride: 1
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "pool2"
+    type: POOLING
+    bottom: "conv2"
+    top: "pool2"
+    pooling_param {
+      pool: MAX
+      kernel_size: 2
+      stride: 2
+    }
+  }
+  layers {
+    name: "ip1"
+    type: INNER_PRODUCT
+    bottom: "pool2"
+    top: "ip1"
+    blobs_lr: 1
+    blobs_lr: 2
+    inner_product_param {
+      num_output: 500
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "relu1"
+    type: RELU
+    bottom: "ip1"
+    top: "ip1"
+  }
+  layers {
+    name: "ip2"
+    type: INNER_PRODUCT
+    bottom: "ip1"
+    top: "ip2"
+    blobs_lr: 1
+    blobs_lr: 2
+    inner_product_param {
+      num_output: 10
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "loss"
+    type: SOFTMAX_LOSS
+    bottom: "ip2"
+    bottom: "label"
+  }
+}
+
+# test_iter specifies how many forward passes the test should carry out.
+# In the case of MNIST, we have test batch size 100 and 100 test iterations,
+# covering the full 10,000 testing images.
+test_iter: 100
+# The testing protocol buffer definition
+test_net_param {
+  name: "LeNet-test"
+  layers {
+    name: "mnist"
+    type: DATA
+    top: "data"
+    top: "label"
+    data_param {
+      source: "mnist-test-leveldb"
+      scale: 0.00390625
+      batch_size: 100
+    }
+  }
+  layers {
+    name: "conv1"
+    type: CONVOLUTION
+    bottom: "data"
+    top: "conv1"
+    convolution_param {
+      num_output: 20
+      kernel_size: 5
+      stride: 1
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "pool1"
+    type: POOLING
+    bottom: "conv1"
+    top: "pool1"
+    pooling_param {
+      pool: MAX
+      kernel_size: 2
+      stride: 2
+    }
+  }
+  layers {
+    name: "conv2"
+    type: CONVOLUTION
+    bottom: "pool1"
+    top: "conv2"
+    convolution_param {
+      num_output: 50
+      kernel_size: 5
+      stride: 1
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "pool2"
+    type: POOLING
+    bottom: "conv2"
+    top: "pool2"
+    pooling_param {
+      pool: MAX
+      kernel_size: 2
+      stride: 2
+    }
+  }
+  layers {
+    name: "ip1"
+    type: INNER_PRODUCT
+    bottom: "pool2"
+    top: "ip1"
+    inner_product_param {
+      num_output: 500
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "relu1"
+    type: RELU
+    bottom: "ip1"
+    top: "ip1"
+  }
+  layers {
+    name: "ip2"
+    type: INNER_PRODUCT
+    bottom: "ip1"
+    top: "ip2"
+    inner_product_param {
+      num_output: 10
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "prob"
+    type: SOFTMAX
+    bottom: "ip2"
+    top: "prob"
+  }
+  layers {
+    name: "accuracy"
+    type: ACCURACY
+    bottom: "prob"
+    bottom: "label"
+    top: "accuracy"
+  }
+}
+
+# The train set has 60K images, so we run 600 test iters (600 * 100 = 60K).
+test_iter: 600
+# The protocol buffer definition to test on the train set
+test_net_param {
+  name: "LeNet-test-on-train"
+  layers {
+    name: "mnist"
+    type: DATA
+    top: "data"
+    top: "label"
+    data_param {
+      source: "mnist-train-leveldb"
+      scale: 0.00390625
+      batch_size: 100
+    }
+  }
+  layers {
+    name: "conv1"
+    type: CONVOLUTION
+    bottom: "data"
+    top: "conv1"
+    convolution_param {
+      num_output: 20
+      kernel_size: 5
+      stride: 1
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "pool1"
+    type: POOLING
+    bottom: "conv1"
+    top: "pool1"
+    pooling_param {
+      pool: MAX
+      kernel_size: 2
+      stride: 2
+    }
+  }
+  layers {
+    name: "conv2"
+    type: CONVOLUTION
+    bottom: "pool1"
+    top: "conv2"
+    convolution_param {
+      num_output: 50
+      kernel_size: 5
+      stride: 1
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "pool2"
+    type: POOLING
+    bottom: "conv2"
+    top: "pool2"
+    pooling_param {
+      pool: MAX
+      kernel_size: 2
+      stride: 2
+    }
+  }
+  layers {
+    name: "ip1"
+    type: INNER_PRODUCT
+    bottom: "pool2"
+    top: "ip1"
+    inner_product_param {
+      num_output: 500
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "relu1"
+    type: RELU
+    bottom: "ip1"
+    top: "ip1"
+  }
+  layers {
+    name: "ip2"
+    type: INNER_PRODUCT
+    bottom: "ip1"
+    top: "ip2"
+    inner_product_param {
+      num_output: 10
+      weight_filler {
+        type: "xavier"
+      }
+      bias_filler {
+        type: "constant"
+      }
+    }
+  }
+  layers {
+    name: "prob"
+    type: SOFTMAX
+    bottom: "ip2"
+    top: "prob"
+  }
+  layers {
+    name: "accuracy"
+    type: ACCURACY
+    bottom: "prob"
+    bottom: "label"
+    top: "accuracy"
+  }
+}
+
+# Expected results for first and last 500 iterations:
+# (with portions of log omitted for brevity)
+#
+# Iteration 0, Testing net (#0)
+# Test score #0: 0.067
+# Test score #1: 2.30256
+# Iteration 0, Testing net (#1)
+# Test score #0: 0.0670334
+# Test score #1: 2.30258
+# Iteration 100, lr = 0.00992565
+# Iteration 100, loss = 0.280585
+# Iteration 200, lr = 0.00985258
+# Iteration 200, loss = 0.345601
+# Iteration 300, lr = 0.00978075
+# Iteration 300, loss = 0.172217
+# Iteration 400, lr = 0.00971013
+# Iteration 400, loss = 0.261836
+# Iteration 500, lr = 0.00964069
+# Iteration 500, loss = 0.157803
+# Iteration 500, Testing net (#0)
+# Test score #0: 0.968
+# Test score #1: 0.0993772
+# Iteration 500, Testing net (#1)
+# Test score #0: 0.965883
+# Test score #1: 0.109374
+#
+# [...]
+#
+# Iteration 9500, Testing net (#0)
+# Test score #0: 0.9899
+# Test score #1: 0.0308299
+# Iteration 9500, Testing net (#1)
+# Test score #0: 0.996816
+# Test score #1: 0.0118238
+# Iteration 9600, lr = 0.00603682
+# Iteration 9600, loss = 0.0126215
+# Iteration 9700, lr = 0.00601382
+# Iteration 9700, loss = 0.00579304
+# Iteration 9800, lr = 0.00599102
+# Iteration 9800, loss = 0.00500633
+# Iteration 9900, lr = 0.00596843
+# Iteration 9900, loss = 0.00796607
+# Iteration 10000, lr = 0.00594604
+# Iteration 10000, loss = 0.00271736
+# Iteration 10000, Testing net (#0)
+# Test score #0: 0.9914
+# Test score #1: 0.0276671
+# Iteration 10000, Testing net (#1)
+# Test score #0: 0.997782
+# Test score #1: 0.00908085
diff --git a/examples/mnist/train_lenet_consolidated.sh b/examples/mnist/train_lenet_consolidated.sh
new file mode 100755
index 00000000000..ff6897e263d
--- /dev/null
+++ b/examples/mnist/train_lenet_consolidated.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env sh
+
+TOOLS=../../build/tools
+
+GLOG_logtostderr=1 $TOOLS/train_net.bin lenet_consolidated_solver.prototxt
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index aef9b22c44d..3112c59e0fc 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -33,7 +33,8 @@ class Solver {
   // written to disk together with the learned net.
   void Snapshot();
   // The test routine
-  void Test();
+  void TestAll();
+  void Test(const int test_net_id = 0);
   virtual void SnapshotSolverState(SolverState* state) = 0;
   // The Restore function implements how one should restore the solver to a
   // previously snapshotted state. You should implement the RestoreSolverState()
@@ -44,7 +45,7 @@ class Solver {
   SolverParameter param_;
   int iter_;
   shared_ptr<Net<Dtype> > net_;
-  shared_ptr<Net<Dtype> > test_net_;
+  vector<shared_ptr<Net<Dtype> > > test_nets_;
 
   DISABLE_COPY_AND_ASSIGN(Solver);
 };
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 6f57d8d09ad..8eeff57efc0 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -33,6 +33,8 @@ Net<Dtype>::Net(const string& param_file) {
 
 template <typename Dtype>
 void Net<Dtype>::Init(const NetParameter& in_param) {
+  LOG(INFO) << "Initializing net from parameters: " << std::endl
+            << in_param.DebugString();
   // Create a copy of in_param with splits added where necessary.
   NetParameter param;
   InsertSplits(in_param, &param);
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index ab3c2fecc5c..ce2f25b8db5 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -57,10 +57,21 @@ message NetParameter {
 }
 
 message SolverParameter {
-  optional string train_net = 1; // The proto file for the training net.
-  optional string test_net = 2; // The proto file for the testing net.
+  // {train,test}_net specify a path to a file containing the {train,test} net
+  // parameters; {train,test}_net_param specify the net parameters directly
+  // inside the SolverParameter.
+  //
+  // Only either train_net or train_net_param (not both) should be specified.
+  // You may specify 0 or more test_net and/or test_net_param.  All
+  // nets specified using test_net_param will be tested first, followed by all
+  // nets specified using test_net (each processed in the order specified in
+  // the prototxt).
+  optional string train_net = 1; // The proto filename for the train net.
+  repeated string test_net = 2; // The proto filenames for the test nets.
+  optional NetParameter train_net_param = 21; // Full params for the train net.
+  repeated NetParameter test_net_param = 22; // Full params for the test nets.
   // The number of iterations for each testing phase.
-  optional int32 test_iter = 3 [default = 0];
+  repeated int32 test_iter = 3;
   // The number of iterations between two testing phases.
   optional int32 test_interval = 4 [default = 0];
   optional bool test_compute_loss = 19 [default = false];
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 4932968d0b6..6a8f18f1331 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -19,13 +19,13 @@ namespace caffe {
 
 template <typename Dtype>
 Solver<Dtype>::Solver(const SolverParameter& param)
-    : net_(), test_net_() {
+    : net_() {
   Init(param);
 }
 
 template <typename Dtype>
 Solver<Dtype>::Solver(const string& param_file)
-    : net_(), test_net_() {
+    : net_() {
   SolverParameter param;
   ReadProtoFromTextFile(param_file, &param);
   Init(param);
@@ -33,19 +33,44 @@ Solver<Dtype>::Solver(const string& param_file)
 
 template <typename Dtype>
 void Solver<Dtype>::Init(const SolverParameter& param) {
+  LOG(INFO) << "Initializing solver from parameters: " << std::endl
+            << param.DebugString();
   param_ = param;
   if (param_.random_seed() >= 0) {
     Caffe::set_random_seed(param_.random_seed());
   }
   // Scaffolding code
-  LOG(INFO) << "Creating training net.";
-  net_.reset(new Net<Dtype>(param_.train_net()));
-  if (param_.has_test_net()) {
-    LOG(INFO) << "Creating testing net.";
-    test_net_.reset(new Net<Dtype>(param_.test_net()));
-    CHECK_GT(param_.test_iter(), 0);
+  if (param_.has_train_net_param()) {
+    CHECK(!param_.has_train_net()) << "Either train_net_param or train_net may "
+                                   << "be specified, but not both.";
+    LOG(INFO) << "Creating training net specified in SolverParameter.";
+    net_.reset(new Net<Dtype>(param_.train_net_param()));
+  } else {
+    CHECK(param_.has_train_net())
+        << "Neither train_net nor train_net_param were specified.";
+    LOG(INFO) << "Creating training net from file: " << param_.train_net();
+    net_.reset(new Net<Dtype>(param_.train_net()));
+  }
+  const int num_test_net_params = param_.test_net_param_size();
+  const int num_test_net_files = param_.test_net_size();
+  const int num_test_nets = num_test_net_params + num_test_net_files;
+  if (num_test_nets) {
+    CHECK_EQ(param_.test_iter_size(), num_test_nets)
+        << "test_iter must be specified for each test network.";
     CHECK_GT(param_.test_interval(), 0);
   }
+  test_nets_.resize(num_test_nets);
+  for (int i = 0; i < num_test_net_params; ++i) {
+      LOG(INFO) << "Creating testing net (#" << i
+                << ") specified in SolverParameter.";
+      test_nets_[i].reset(new Net<Dtype>(param_.test_net_param(i)));
+  }
+  for (int i = 0, test_net_id = num_test_net_params;
+       i < num_test_net_files; ++i, ++test_net_id) {
+      LOG(INFO) << "Creating testing net (#" << test_net_id
+                << ") from file: " << param.test_net(i);
+      test_nets_[test_net_id].reset(new Net<Dtype>(param_.test_net(i)));
+  }
   LOG(INFO) << "Solver scaffolding done.";
 }
 
@@ -72,7 +97,7 @@ void Solver<Dtype>::Solve(const char* resume_file) {
   // there's not enough memory to run the test net and crash, etc.; and to gauge
   // the effect of the first training iterations.
   if (param_.test_interval()) {
-    Test();
+    TestAll();
   }
 
   // For a network that is trained by the solver, no bottom or top vecs
@@ -87,7 +112,7 @@ void Solver<Dtype>::Solve(const char* resume_file) {
       LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss;
     }
     if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
-      Test();
+      TestAll();
     }
     // Check if we need to do snapshot
     if (param_.snapshot() && iter_ % param_.snapshot() == 0) {
@@ -102,18 +127,28 @@ void Solver<Dtype>::Solve(const char* resume_file) {
 
 
 template <typename Dtype>
-void Solver<Dtype>::Test() {
-  LOG(INFO) << "Iteration " << iter_ << ", Testing net";
+void Solver<Dtype>::TestAll() {
+  for (int test_net_id = 0; test_net_id < test_nets_.size(); ++test_net_id) {
+    Test(test_net_id);
+  }
+}
+
+
+template <typename Dtype>
+void Solver<Dtype>::Test(const int test_net_id) {
+  LOG(INFO) << "Iteration " << iter_
+            << ", Testing net (#" << test_net_id << ")";
   // We need to set phase to test before running.
   Caffe::set_phase(Caffe::TEST);
-  CHECK_NOTNULL(test_net_.get())->ShareTrainedLayersWith(net_.get());
+  CHECK_NOTNULL(test_nets_[test_net_id].get())->
+      ShareTrainedLayersWith(net_.get());
   vector<Dtype> test_score;
   vector<Blob<Dtype>*> bottom_vec;
   Dtype loss = 0;
-  for (int i = 0; i < param_.test_iter(); ++i) {
+  for (int i = 0; i < param_.test_iter(test_net_id); ++i) {
     Dtype iter_loss;
     const vector<Blob<Dtype>*>& result =
-        test_net_->Forward(bottom_vec, &iter_loss);
+        test_nets_[test_net_id]->Forward(bottom_vec, &iter_loss);
     if (param_.test_compute_loss()) {
       loss += iter_loss;
     }
@@ -135,12 +170,12 @@ void Solver<Dtype>::Test() {
     }
   }
   if (param_.test_compute_loss()) {
-    loss /= param_.test_iter();
+    loss /= param_.test_iter(test_net_id);
     LOG(INFO) << "Test loss: " << loss;
   }
   for (int i = 0; i < test_score.size(); ++i) {
     LOG(INFO) << "Test score #" << i << ": "
-        << test_score[i] / param_.test_iter();
+        << test_score[i] / param_.test_iter(test_net_id);
   }
   Caffe::set_phase(Caffe::TRAIN);
 }