diff --git a/docs/feature_extraction.md b/docs/feature_extraction.md
index 13639fbf8fd..fa23e9c8708 100644
--- a/docs/feature_extraction.md
+++ b/docs/feature_extraction.md
@@ -22,7 +22,7 @@ We're going to use the images that ship with caffe.
 
     find `pwd`/examples/images -type f -exec echo {} \; > examples/_temp/temp.txt
 
-The `ImagesLayer` we'll use expects labels after each filenames, so let's add a 0 to the end of each line
+The `ImageDataLayer` we'll use expects labels after each filenames, so let's add a 0 to the end of each line
 
     sed "s/$/ 0/" examples/_temp/temp.txt > examples/_temp/file_list.txt
 
@@ -37,7 +37,7 @@ Download the mean image of the ILSVRC dataset.
 We will use `data/ilsvrc212/imagenet_mean.binaryproto` in the network definition prototxt.
 
 Let's copy and modify the network definition.
-We'll be using the `ImagesLayer`, which will load and resize images for us.
+We'll be using the `ImageDataLayer`, which will load and resize images for us.
 
     cp examples/feature_extraction/imagenet_val.prototxt examples/_temp
 
diff --git a/docs/imagenet_training.md b/docs/imagenet_training.md
index fbf1e1359ab..9e0076cf65f 100644
--- a/docs/imagenet_training.md
+++ b/docs/imagenet_training.md
@@ -55,7 +55,7 @@ Network Definition
 The network definition follows strictly the one in Krizhevsky et al. You can find the detailed definition at `examples/imagenet/imagenet_train.prototxt`. Note the paths in the data layer - if you have not followed the exact paths in this guide you will need to change the following lines:
 
     source: "ilvsrc12_train_leveldb"
-    meanfile: "../../data/ilsvrc12/imagenet_mean.binaryproto"
+    mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
 
 to point to your own leveldb and image mean. Likewise, do the same for `examples/imagenet/imagenet_val.prototxt`.
 
diff --git a/docs/mnist_prototxt.md b/docs/mnist_prototxt.md
index 5ed2f23b2d6..aaff2b00953 100644
--- a/docs/mnist_prototxt.md
+++ b/docs/mnist_prototxt.md
@@ -17,11 +17,11 @@ Writing the Data Layer
 Currently, we will read the MNIST data from the leveldb we created earlier in the demo. This is defined by a data layer:
 
     layers {
-      layer {
-        name: "mnist"
-        type: "data"
+      name: "mnist"
+      type: DATA
+      data_param {
         source: "mnist-train-leveldb"
-        batchsize: 64
+        batch_size: 64
         scale: 0.00390625
       }
       top: "data"
@@ -35,9 +35,11 @@ Writing the Convolution Layer
 Let's define the first convolution layer:
 
     layers {
-      layer {
-        name: "conv1"
-        type: "conv"
+      name: "conv1"
+      type: CONVOLUTION
+      blobs_lr: 1.
+      blobs_lr: 2.
+      convolution_param {
         num_output: 20
         kernelsize: 5
         stride: 1
@@ -47,8 +49,6 @@ Let's define the first convolution layer:
         bias_filler {
           type: "constant"
         }
-        blobs_lr: 1.
-        blobs_lr: 2.
       }
       bottom: "data"
       top: "conv1"
@@ -65,10 +65,10 @@ Writing the Pooling Layer
 Phew. Pooling layers are actually much easier to define:
 
     layers {
-      layer {
-        name: "pool1"
-        type: "pool"
-        kernelsize: 2
+      name: "pool1"
+      type: POOLING
+      pooling_param {
+        kernel_size: 2
         stride: 2
         pool: MAX
       }
@@ -82,12 +82,14 @@ Similarly, you can write up the second convolution and pooling layers. Check `da
 
 Writing the Fully Connected Layer
 ----------------------------------
-Writing a fully connected layers is also simple:
+Writing a fully connected layer is also simple:
 
     layers {
-      layer {
-        name: "ip1"
-        type: "innerproduct"
+      name: "ip1"
+      type: INNER_PRODUCT
+      blobs_lr: 1.
+      blobs_lr: 2.
+      inner_product_param {
         num_output: 500
         weight_filler {
           type: "xavier"
@@ -95,8 +97,6 @@ Writing a fully connected layers is also simple:
         bias_filler {
           type: "constant"
         }
-        blobs_lr: 1.
-        blobs_lr: 2.
       }
       bottom: "pool2"
       top: "ip1"
@@ -109,10 +109,8 @@ Writing the ReLU Layer
 A ReLU Layer is also simple:
 
     layers {
-      layer {
-        name: "relu1"
-        type: "relu"
-      }
+      name: "relu1"
+      type: RELU
       bottom: "ip1"
       top: "ip1"
     }
@@ -122,9 +120,11 @@ Since ReLU is an element-wise operation, we can do *in-place* operations to save
 After the ReLU layer, we will write another innerproduct layer:
 
     layers {
-      layer {
-        name: "ip2"
-        type: "innerproduct"
+      name: "ip2"
+      type: INNER_PRODUCT
+      blobs_lr: 1.
+      blobs_lr: 2.
+      inner_product_param {
         num_output: 10
         weight_filler {
           type: "xavier"
@@ -132,8 +132,6 @@ After the ReLU layer, we will write another innerproduct layer:
         bias_filler {
           type: "constant"
         }
-        blobs_lr: 1.
-        blobs_lr: 2.
       }
       bottom: "ip1"
       top: "ip2"
@@ -144,10 +142,8 @@ Writing the Loss Layer
 Finally, we will write the loss!
 
     layers {
-      layer {
-        name: "loss"
-        type: "softmax_loss"
-      }
+      name: "loss"
+      type: SOFTMAX_LOSS
       bottom: "ip2"
       bottom: "label"
     }
diff --git a/examples/cifar10/cifar10_full.prototxt b/examples/cifar10/cifar10_full.prototxt
index 64fb2a8de85..237a7a0a0ed 100644
--- a/examples/cifar10/cifar10_full.prototxt
+++ b/examples/cifar10/cifar10_full.prototxt
@@ -6,148 +6,135 @@ input_dim: 1
 input_dim: 3
 input_dim: 32
 input_dim: 32
-# ------------------------ layer 1 -----------------------------
 layers {
- layer {
-   name: "conv1"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "data"
- top: "conv1"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+  }
 }
 layers {
- layer {
-   name: "pool1"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: MAX
- }
- bottom: "conv1"
- top: "pool1"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+  }
 }
 layers {
- layer {
-   name: "relu1"
-   type: "relu"
- }
- bottom: "pool1"
- top: "pool1"
+  name: "relu1"
+  type: RELU
+  bottom: "pool1"
+  top: "pool1"
 }
 layers {
-  layer {
-    name: "norm1"
-    type: "lrn"
+  name: "norm1"
+  type: LRN
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
     local_size: 3
-    alpha: 0.00005
+    alpha: 5e-05
     beta: 0.75
   }
-  bottom: "pool1"
-  top: "norm1"
 }
-# --------------------------- layer 2 ------------------------
 layers {
- layer {
-   name: "conv2"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   blobs_lr: 1.
-   blobs_lr: 2.
- }
- bottom: "norm1"
- top: "conv2"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "norm1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+  }
 }
 layers {
- layer {
-   name: "relu2"
-   type: "relu"
- }
- bottom: "conv2"
- top: "conv2"
+  name: "relu2"
+  type: RELU
+  bottom: "conv2"
+  top: "conv2"
 }
 layers {
- layer {
-   name: "pool2"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv2"
- top: "pool2"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
 layers {
-  layer {
-    name: "norm2"
-    type: "lrn"
+  name: "norm2"
+  type: LRN
+  bottom: "pool2"
+  top: "norm2"
+  lrn_param {
     local_size: 3
-    alpha: 0.00005
+    alpha: 5e-05
     beta: 0.75
   }
-  bottom: "pool2"
-  top: "norm2"
 }
-#-----------------------layer 3-------------------------
 layers {
- layer {
-   name: "conv3"
-   type: "conv"
-   num_output: 64
-   kernelsize: 5
-   pad: 2
-   stride: 1
- }
- bottom: "norm2"
- top: "conv3"
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "norm2"
+  top: "conv3"
+  convolution_param {
+    num_output: 64
+    pad: 2
+    kernel_size: 5
+    stride: 1
+  }
 }
 layers {
- layer {
-   name: "relu3"
-   type: "relu"
- }
- bottom: "conv3"
- top: "conv3"
+  name: "relu3"
+  type: RELU
+  bottom: "conv3"
+  top: "conv3"
 }
 layers {
- layer {
-   name: "pool3"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv3"
- top: "pool3"
+  name: "pool3"
+  type: POOLING
+  bottom: "conv3"
+  top: "pool3"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
-#--------------------------layer 4------------------------
 layers {
- layer {
-   name: "ip1"
-   type: "innerproduct"
-   num_output: 10
-   blobs_lr: 1.
-   blobs_lr: 2.
-   weight_decay: 250.
-   weight_decay: 0.
- }
- bottom: "pool3"
- top: "ip1"
+  name: "ip1"
+  type: INNER_PRODUCT
+  bottom: "pool3"
+  top: "ip1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 250
+  weight_decay: 0
+  inner_product_param {
+    num_output: 10
+  }
 }
-#-----------------------output------------------------
 layers {
- layer {
-   name: "prob"
-   type: "softmax"
- }
- bottom: "ip1"
- top: "prob"
+  name: "prob"
+  type: SOFTMAX
+  bottom: "ip1"
+  top: "prob"
 }
diff --git a/examples/cifar10/cifar10_full_test.prototxt b/examples/cifar10/cifar10_full_test.prototxt
index a77c7d268da..ada373a55cb 100644
--- a/examples/cifar10/cifar10_full_test.prototxt
+++ b/examples/cifar10/cifar10_full_test.prototxt
@@ -1,193 +1,178 @@
 name: "CIFAR10_full_test"
 layers {
- layer {
-   name: "cifar"
-   type: "data"
-   source: "cifar10-leveldb/cifar-test-leveldb"
-   meanfile: "mean.binaryproto"
-   batchsize: 100
- }
- top: "data"
- top: "label"
+  name: "cifar"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "cifar10-leveldb/cifar-test-leveldb"
+    mean_file: "mean.binaryproto"
+    batch_size: 100
+  }
 }
-# ------------------------ layer 1 -----------------------------
 layers {
- layer {
-   name: "conv1"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.0001
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.
-   blobs_lr: 2.
- }
- bottom: "data"
- top: "conv1"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.0001
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "pool1"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: MAX
- }
- bottom: "conv1"
- top: "pool1"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+  }
 }
 layers {
- layer {
-   name: "relu1"
-   type: "relu"
- }
- bottom: "pool1"
- top: "pool1"
+  name: "relu1"
+  type: RELU
+  bottom: "pool1"
+  top: "pool1"
 }
 layers {
-  layer {
-    name: "norm1"
-    type: "lrn"
+  name: "norm1"
+  type: LRN
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
     local_size: 3
-    alpha: 0.00005
+    alpha: 5e-05
     beta: 0.75
   }
-  bottom: "pool1"
-  top: "norm1"
 }
-# --------------------------- layer 2 ------------------------
 layers {
- layer {
-   name: "conv2"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.01
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.
-   blobs_lr: 2.
- }
- bottom: "norm1"
- top: "conv2"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "norm1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "relu2"
-   type: "relu"
- }
- bottom: "conv2"
- top: "conv2"
+  name: "relu2"
+  type: RELU
+  bottom: "conv2"
+  top: "conv2"
 }
 layers {
- layer {
-   name: "pool2"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv2"
- top: "pool2"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
 layers {
-  layer {
-    name: "norm2"
-    type: "lrn"
+  name: "norm2"
+  type: LRN
+  bottom: "pool2"
+  top: "norm2"
+  lrn_param {
     local_size: 3
-    alpha: 0.00005
+    alpha: 5e-05
     beta: 0.75
   }
-  bottom: "pool2"
-  top: "norm2"
 }
-#-----------------------layer 3-------------------------
 layers {
- layer {
-   name: "conv3"
-   type: "conv"
-   num_output: 64
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.01
-   }
-   bias_filler {
-     type: "constant"
-   }
- }
- bottom: "norm2"
- top: "conv3"
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "norm2"
+  top: "conv3"
+  convolution_param {
+    num_output: 64
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "relu3"
-   type: "relu"
- }
- bottom: "conv3"
- top: "conv3"
+  name: "relu3"
+  type: RELU
+  bottom: "conv3"
+  top: "conv3"
 }
 layers {
- layer {
-   name: "pool3"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv3"
- top: "pool3"
+  name: "pool3"
+  type: POOLING
+  bottom: "conv3"
+  top: "pool3"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
-#--------------------------layer 4------------------------
 layers {
- layer {
-   name: "ip1"
-   type: "innerproduct"
-   num_output: 10
-   weight_filler {
-     type: "gaussian"
-     std: 0.01
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.
-   blobs_lr: 2.
-   weight_decay: 250.
-   weight_decay: 0.
- }
- bottom: "pool3"
- top: "ip1"
+  name: "ip1"
+  type: INNER_PRODUCT
+  bottom: "pool3"
+  top: "ip1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 250
+  weight_decay: 0
+  inner_product_param {
+    num_output: 10
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
-#-----------------------output------------------------
 layers {
- layer {
-   name: "prob"
-   type: "softmax"
- }
- bottom: "ip1"
- top: "prob"
+  name: "prob"
+  type: SOFTMAX
+  bottom: "ip1"
+  top: "prob"
 }
 layers {
-  layer {
-    name: "accuracy"
-    type: "accuracy"
-  }
+  name: "accuracy"
+  type: ACCURACY
   bottom: "prob"
   bottom: "label"
   top: "accuracy"
diff --git a/examples/cifar10/cifar10_full_train.prototxt b/examples/cifar10/cifar10_full_train.prototxt
index 28e4612c04e..56c9306e2dc 100644
--- a/examples/cifar10/cifar10_full_train.prototxt
+++ b/examples/cifar10/cifar10_full_train.prototxt
@@ -1,185 +1,172 @@
 name: "CIFAR10_full_train"
 layers {
- layer {
-   name: "cifar"
-   type: "data"
-   source: "cifar10-leveldb/cifar-train-leveldb"
-   meanfile: "mean.binaryproto"
-   batchsize: 100
- }
- top: "data"
- top: "label"
+  name: "cifar"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "cifar10-leveldb/cifar-train-leveldb"
+    mean_file: "mean.binaryproto"
+    batch_size: 100
+  }
 }
-# ------------------------ layer 1 -----------------------------
 layers {
- layer {
-   name: "conv1"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.0001
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.
-   blobs_lr: 2.
- }
- bottom: "data"
- top: "conv1"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.0001
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "pool1"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: MAX
- }
- bottom: "conv1"
- top: "pool1"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+  }
 }
 layers {
- layer {
-   name: "relu1"
-   type: "relu"
- }
- bottom: "pool1"
- top: "pool1"
+  name: "relu1"
+  type: RELU
+  bottom: "pool1"
+  top: "pool1"
 }
 layers {
-  layer {
-    name: "norm1"
-    type: "lrn"
+  name: "norm1"
+  type: LRN
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
     local_size: 3
-    alpha: 0.00005
+    alpha: 5e-05
     beta: 0.75
   }
-  bottom: "pool1"
-  top: "norm1"
 }
-# --------------------------- layer 2 ------------------------
 layers {
- layer {
-   name: "conv2"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.01
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.
-   blobs_lr: 2.
- }
- bottom: "norm1"
- top: "conv2"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "norm1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "relu2"
-   type: "relu"
- }
- bottom: "conv2"
- top: "conv2"
+  name: "relu2"
+  type: RELU
+  bottom: "conv2"
+  top: "conv2"
 }
 layers {
- layer {
-   name: "pool2"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv2"
- top: "pool2"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
 layers {
-  layer {
-    name: "norm2"
-    type: "lrn"
+  name: "norm2"
+  type: LRN
+  bottom: "pool2"
+  top: "norm2"
+  lrn_param {
     local_size: 3
-    alpha: 0.00005
+    alpha: 5e-05
     beta: 0.75
   }
-  bottom: "pool2"
-  top: "norm2"
 }
-#-----------------------layer 3-------------------------
 layers {
- layer {
-   name: "conv3"
-   type: "conv"
-   num_output: 64
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.01
-   }
-   bias_filler {
-     type: "constant"
-   }
- }
- bottom: "norm2"
- top: "conv3"
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "norm2"
+  top: "conv3"
+  convolution_param {
+    num_output: 64
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "relu3"
-   type: "relu"
- }
- bottom: "conv3"
- top: "conv3"
+  name: "relu3"
+  type: RELU
+  bottom: "conv3"
+  top: "conv3"
 }
 layers {
- layer {
-   name: "pool3"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv3"
- top: "pool3"
+  name: "pool3"
+  type: POOLING
+  bottom: "conv3"
+  top: "pool3"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
-#--------------------------layer 4------------------------
 layers {
- layer {
-   name: "ip1"
-   type: "innerproduct"
-   num_output: 10
-   weight_filler {
-     type: "gaussian"
-     std: 0.01
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.
-   blobs_lr: 2.
-   weight_decay: 250.
-   weight_decay: 0.
- }
- bottom: "pool3"
- top: "ip1"
+  name: "ip1"
+  type: INNER_PRODUCT
+  bottom: "pool3"
+  top: "ip1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 250
+  weight_decay: 0
+  inner_product_param {
+    num_output: 10
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
-#-----------------------output------------------------
 layers {
- layer {
-   name: "loss"
-   type: "softmax_loss"
- }
- bottom: "ip1"
- bottom: "label"
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "ip1"
+  bottom: "label"
 }
diff --git a/examples/cifar10/cifar10_quick.prototxt b/examples/cifar10/cifar10_quick.prototxt
index 6161caa10e8..505158f7a34 100644
--- a/examples/cifar10/cifar10_quick.prototxt
+++ b/examples/cifar10/cifar10_quick.prototxt
@@ -1,143 +1,127 @@
 name: "CIFAR10_quick_test"
-# N.B. input image must be in CIFAR-10 format
-# as described at http://www.cs.toronto.edu/~kriz/cifar.html
 input: "data"
 input_dim: 1
 input_dim: 3
 input_dim: 32
 input_dim: 32
-# ------------------------ layer 1 -----------------------------
 layers {
- layer {
-   name: "conv1"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "data"
- top: "conv1"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+  }
 }
 layers {
- layer {
-   name: "pool1"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: MAX
- }
- bottom: "conv1"
- top: "pool1"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+  }
 }
 layers {
- layer {
-   name: "relu1"
-   type: "relu"
- }
- bottom: "pool1"
- top: "pool1"
+  name: "relu1"
+  type: RELU
+  bottom: "pool1"
+  top: "pool1"
 }
-# --------------------------- layer 2 ------------------------
 layers {
- layer {
-   name: "conv2"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "pool1"
- top: "conv2"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "pool1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+  }
 }
 layers {
- layer {
-   name: "relu2"
-   type: "relu"
- }
- bottom: "conv2"
- top: "conv2"
+  name: "relu2"
+  type: RELU
+  bottom: "conv2"
+  top: "conv2"
 }
 layers {
- layer {
-   name: "pool2"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv2"
- top: "pool2"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
-#-----------------------layer 3-------------------------
 layers {
- layer {
-   name: "conv3"
-   type: "conv"
-   num_output: 64
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "pool2"
- top: "conv3"
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "pool2"
+  top: "conv3"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 64
+    pad: 2
+    kernel_size: 5
+    stride: 1
+  }
 }
 layers {
- layer {
-   name: "relu3"
-   type: "relu"
- }
- bottom: "conv3"
- top: "conv3"
+  name: "relu3"
+  type: RELU
+  bottom: "conv3"
+  top: "conv3"
 }
 layers {
- layer {
-   name: "pool3"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv3"
- top: "pool3"
+  name: "pool3"
+  type: POOLING
+  bottom: "conv3"
+  top: "pool3"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
-#--------------------------layer 4------------------------
 layers {
- layer {
-   name: "ip1"
-   type: "innerproduct"
-   num_output: 64
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "pool3"
- top: "ip1"
+  name: "ip1"
+  type: INNER_PRODUCT
+  bottom: "pool3"
+  top: "ip1"
+  blobs_lr: 1
+  blobs_lr: 2
+  inner_product_param {
+    num_output: 64
+  }
 }
-#--------------------------layer 5------------------------
 layers {
- layer {
-   name: "ip2"
-   type: "innerproduct"
-   num_output: 10
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "ip1"
- top: "ip2"
+  name: "ip2"
+  type: INNER_PRODUCT
+  bottom: "ip1"
+  top: "ip2"
+  blobs_lr: 1
+  blobs_lr: 2
+  inner_product_param {
+    num_output: 10
+  }
 }
-#-----------------------output------------------------
 layers {
- layer {
-   name: "prob"
-   type: "softmax"
- }
- bottom: "ip2"
- top: "prob"
+  name: "prob"
+  type: SOFTMAX
+  bottom: "ip2"
+  top: "prob"
 }
diff --git a/examples/cifar10/cifar10_quick_test.prototxt b/examples/cifar10/cifar10_quick_test.prototxt
index a937df57d00..a154b9a0ea7 100644
--- a/examples/cifar10/cifar10_quick_test.prototxt
+++ b/examples/cifar10/cifar10_quick_test.prototxt
@@ -1,191 +1,174 @@
-# quick config
 name: "CIFAR10_quick_test"
 layers {
- layer {
-   name: "cifar"
-   type: "data"
-   source: "cifar10-leveldb/cifar-test-leveldb"
-   meanfile: "mean.binaryproto"
-   batchsize: 100
- }
- top: "data"
- top: "label"
+  name: "cifar"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "cifar10-leveldb/cifar-test-leveldb"
+    mean_file: "mean.binaryproto"
+    batch_size: 100
+  }
 }
-# ------------------------ layer 1 -----------------------------
 layers {
- layer {
-   name: "conv1"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.0001
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "data"
- top: "conv1"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.0001
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "pool1"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: MAX
- }
- bottom: "conv1"
- top: "pool1"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+  }
 }
 layers {
- layer {
-   name: "relu1"
-   type: "relu"
- }
- bottom: "pool1"
- top: "pool1"
+  name: "relu1"
+  type: RELU
+  bottom: "pool1"
+  top: "pool1"
 }
-# --------------------------- layer 2 ------------------------
 layers {
- layer {
-   name: "conv2"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.01
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "pool1"
- top: "conv2"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "pool1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "relu2"
-   type: "relu"
- }
- bottom: "conv2"
- top: "conv2"
+  name: "relu2"
+  type: RELU
+  bottom: "conv2"
+  top: "conv2"
 }
 layers {
- layer {
-   name: "pool2"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv2"
- top: "pool2"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
-#-----------------------layer 3-------------------------
 layers {
- layer {
-   name: "conv3"
-   type: "conv"
-   num_output: 64
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.01
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "pool2"
- top: "conv3"
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "pool2"
+  top: "conv3"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 64
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "relu3"
-   type: "relu"
- }
- bottom: "conv3"
- top: "conv3"
+  name: "relu3"
+  type: RELU
+  bottom: "conv3"
+  top: "conv3"
 }
 layers {
- layer {
-   name: "pool3"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv3"
- top: "pool3"
+  name: "pool3"
+  type: POOLING
+  bottom: "conv3"
+  top: "pool3"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
-#--------------------------layer 4------------------------
 layers {
- layer {
-   name: "ip1"
-   type: "innerproduct"
-   num_output: 64
-   weight_filler {
-     type: "gaussian"
-     std: 0.1
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "pool3"
- top: "ip1"
+  name: "ip1"
+  type: INNER_PRODUCT
+  bottom: "pool3"
+  top: "ip1"
+  blobs_lr: 1
+  blobs_lr: 2
+  inner_product_param {
+    num_output: 64
+    weight_filler {
+      type: "gaussian"
+      std: 0.1
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
-#--------------------------layer 5------------------------
 layers {
- layer {
-   name: "ip2"
-   type: "innerproduct"
-   num_output: 10
-   weight_filler {
-     type: "gaussian"
-     std: 0.1
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "ip1"
- top: "ip2"
+  name: "ip2"
+  type: INNER_PRODUCT
+  bottom: "ip1"
+  top: "ip2"
+  blobs_lr: 1
+  blobs_lr: 2
+  inner_product_param {
+    num_output: 10
+    weight_filler {
+      type: "gaussian"
+      std: 0.1
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
-#-----------------------output------------------------
 layers {
- layer {
-   name: "prob"
-   type: "softmax"
- }
- bottom: "ip2"
- top: "prob"
+  name: "prob"
+  type: SOFTMAX
+  bottom: "ip2"
+  top: "prob"
 }
 layers {
-  layer {
-    name: "accuracy"
-    type: "accuracy"
-  }
+  name: "accuracy"
+  type: ACCURACY
   bottom: "prob"
   bottom: "label"
   top: "accuracy"
diff --git a/examples/cifar10/cifar10_quick_train.prototxt b/examples/cifar10/cifar10_quick_train.prototxt
index 2d3a10a6c7f..de5b6c32c5d 100644
--- a/examples/cifar10/cifar10_quick_train.prototxt
+++ b/examples/cifar10/cifar10_quick_train.prototxt
@@ -1,183 +1,168 @@
-# quick config
 name: "CIFAR10_quick_train"
 layers {
- layer {
-   name: "cifar"
-   type: "data"
-   source: "cifar10-leveldb/cifar-train-leveldb"
-   meanfile: "mean.binaryproto"
-   batchsize: 100
- }
- top: "data"
- top: "label"
+  name: "cifar"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
+    source: "cifar10-leveldb/cifar-train-leveldb"
+    mean_file: "mean.binaryproto"
+    batch_size: 100
+  }
 }
-# ------------------------ layer 1 -----------------------------
 layers {
- layer {
-   name: "conv1"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.0001
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "data"
- top: "conv1"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.0001
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "pool1"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: MAX
- }
- bottom: "conv1"
- top: "pool1"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+  }
 }
 layers {
- layer {
-   name: "relu1"
-   type: "relu"
- }
- bottom: "pool1"
- top: "pool1"
+  name: "relu1"
+  type: RELU
+  bottom: "pool1"
+  top: "pool1"
 }
-# --------------------------- layer 2 ------------------------
 layers {
- layer {
-   name: "conv2"
-   type: "conv"
-   num_output: 32
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.01
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "pool1"
- top: "conv2"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "pool1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 32
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "relu2"
-   type: "relu"
- }
- bottom: "conv2"
- top: "conv2"
+  name: "relu2"
+  type: RELU
+  bottom: "conv2"
+  top: "conv2"
 }
 layers {
- layer {
-   name: "pool2"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv2"
- top: "pool2"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
-#-----------------------layer 3-------------------------
 layers {
- layer {
-   name: "conv3"
-   type: "conv"
-   num_output: 64
-   kernelsize: 5
-   pad: 2
-   stride: 1
-   weight_filler {
-     type: "gaussian"
-     std: 0.01
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "pool2"
- top: "conv3"
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "pool2"
+  top: "conv3"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
+    num_output: 64
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
 layers {
- layer {
-   name: "relu3"
-   type: "relu"
- }
- bottom: "conv3"
- top: "conv3"
+  name: "relu3"
+  type: RELU
+  bottom: "conv3"
+  top: "conv3"
 }
 layers {
- layer {
-   name: "pool3"
-   type: "pool"
-   kernelsize: 3
-   stride: 2
-   pool: AVE
- }
- bottom: "conv3"
- top: "pool3"
+  name: "pool3"
+  type: POOLING
+  bottom: "conv3"
+  top: "pool3"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 2
+  }
 }
-#--------------------------layer 4------------------------
 layers {
- layer {
-   name: "ip1"
-   type: "innerproduct"
-   num_output: 64
-   weight_filler {
-     type: "gaussian"
-     std: 0.1
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "pool3"
- top: "ip1"
+  name: "ip1"
+  type: INNER_PRODUCT
+  bottom: "pool3"
+  top: "ip1"
+  blobs_lr: 1
+  blobs_lr: 2
+  inner_product_param {
+    num_output: 64
+    weight_filler {
+      type: "gaussian"
+      std: 0.1
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
-#--------------------------layer 5------------------------
 layers {
- layer {
-   name: "ip2"
-   type: "innerproduct"
-   num_output: 10
-   weight_filler {
-     type: "gaussian"
-     std: 0.1
-   }
-   bias_filler {
-     type: "constant"
-   }
-   blobs_lr: 1.0
-   blobs_lr: 2.0
- }
- bottom: "ip1"
- top: "ip2"
+  name: "ip2"
+  type: INNER_PRODUCT
+  bottom: "ip1"
+  top: "ip2"
+  blobs_lr: 1
+  blobs_lr: 2
+  inner_product_param {
+    num_output: 10
+    weight_filler {
+      type: "gaussian"
+      std: 0.1
+    }
+    bias_filler {
+      type: "constant"
+    }
+  }
 }
-#-----------------------output------------------------
 layers {
- layer {
-   name: "loss"
-   type: "softmax_loss"
- }
- bottom: "ip2"
- bottom: "label"
+  name: "loss"
+  type: SOFTMAX_LOSS
+  bottom: "ip2"
+  bottom: "label"
 }
diff --git a/examples/feature_extraction/imagenet_val.prototxt b/examples/feature_extraction/imagenet_val.prototxt
index c7b26509125..14bfe770ef8 100644
--- a/examples/feature_extraction/imagenet_val.prototxt
+++ b/examples/feature_extraction/imagenet_val.prototxt
@@ -1,246 +1,228 @@
 name: "CaffeNet"
 layers {
-  layer {
-    name: "data"
-    type: "images"
+  name: "data"
+  type: IMAGE_DATA
+  top: "data"
+  top: "label"
+  image_data_param {
     source: "$CAFFE_DIR/examples/_temp/file_list.txt"
-    meanfile: "$CAFFE_DIR/data/ilsvrc12/imagenet_mean.binaryproto"
-    batchsize: 50
+    mean_file: "$CAFFE_DIR/data/ilsvrc12/imagenet_mean.binaryproto"
+    batch_size: 50
+    crop_size: 227
+    mirror: false
     new_height: 256
     new_width: 256
-    mirror: false
-    cropsize: 227
   }
-  top: "data"
-  top: "label"
 }
 layers {
-  layer {
-    name: "conv1"
-    type: "conv"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  convolution_param {
     num_output: 96
-    kernelsize: 11
+    kernel_size: 11
     stride: 4
   }
-  bottom: "data"
-  top: "conv1"
 }
 layers {
-  layer {
-    name: "relu1"
-    type: "relu"
-  }
+  name: "relu1"
+  type: RELU
   bottom: "conv1"
   top: "conv1"
 }
 layers {
-  layer {
-    name: "pool1"
-    type: "pool"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv1"
-  top: "pool1"
 }
 layers {
-  layer {
-    name: "norm1"
-    type: "lrn"
+  name: "norm1"
+  type: LRN
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool1"
-  top: "norm1"
 }
 layers {
-  layer {
-    name: "conv2"
-    type: "conv"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "norm1"
+  top: "conv2"
+  convolution_param {
     num_output: 256
-    group: 2
-    kernelsize: 5
     pad: 2
+    kernel_size: 5
+    group: 2
   }
-  bottom: "norm1"
-  top: "conv2"
 }
 layers {
-  layer {
-    name: "relu2"
-    type: "relu"
-  }
+  name: "relu2"
+  type: RELU
   bottom: "conv2"
   top: "conv2"
 }
 layers {
-  layer {
-    name: "pool2"
-    type: "pool"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv2"
-  top: "pool2"
 }
 layers {
-  layer {
-    name: "norm2"
-    type: "lrn"
+  name: "norm2"
+  type: LRN
+  bottom: "pool2"
+  top: "norm2"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool2"
-  top: "norm2"
 }
 layers {
-  layer {
-    name: "conv3"
-    type: "conv"
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "norm2"
+  top: "conv3"
+  convolution_param {
     num_output: 384
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
   }
-  bottom: "norm2"
-  top: "conv3"
 }
 layers {
-  layer {
-    name: "relu3"
-    type: "relu"
-  }
+  name: "relu3"
+  type: RELU
   bottom: "conv3"
   top: "conv3"
 }
 layers {
-  layer {
-    name: "conv4"
-    type: "conv"
+  name: "conv4"
+  type: CONVOLUTION
+  bottom: "conv3"
+  top: "conv4"
+  convolution_param {
     num_output: 384
-    group: 2
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
+    group: 2
   }
-  bottom: "conv3"
-  top: "conv4"
 }
 layers {
-  layer {
-    name: "relu4"
-    type: "relu"
-  }
+  name: "relu4"
+  type: RELU
   bottom: "conv4"
   top: "conv4"
 }
 layers {
-  layer {
-    name: "conv5"
-    type: "conv"
+  name: "conv5"
+  type: CONVOLUTION
+  bottom: "conv4"
+  top: "conv5"
+  convolution_param {
     num_output: 256
-    group: 2
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
+    group: 2
   }
-  bottom: "conv4"
-  top: "conv5"
 }
 layers {
-  layer {
-    name: "relu5"
-    type: "relu"
-  }
+  name: "relu5"
+  type: RELU
   bottom: "conv5"
   top: "conv5"
 }
 layers {
-  layer {
-    name: "pool5"
-    type: "pool"
-    kernelsize: 3
+  name: "pool5"
+  type: POOLING
+  bottom: "conv5"
+  top: "pool5"
+  pooling_param {
     pool: MAX
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv5"
-  top: "pool5"
 }
 layers {
-  layer {
-    name: "fc6"
-    type: "innerproduct"
-    num_output: 4096
-  }
+  name: "fc6"
+  type: INNER_PRODUCT
   bottom: "pool5"
   top: "fc6"
+  inner_product_param {
+    num_output: 4096
+  }
 }
 layers {
-  layer {
-    name: "relu6"
-    type: "relu"
-  }
+  name: "relu6"
+  type: RELU
   bottom: "fc6"
   top: "fc6"
 }
 layers {
-  layer {
-    name: "drop6"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop6"
+  type: DROPOUT
   bottom: "fc6"
   top: "fc6"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc7"
-    type: "innerproduct"
-    num_output: 4096
-  }
+  name: "fc7"
+  type: INNER_PRODUCT
   bottom: "fc6"
   top: "fc7"
+  inner_product_param {
+    num_output: 4096
+  }
 }
 layers {
-  layer {
-    name: "relu7"
-    type: "relu"
-  }
+  name: "relu7"
+  type: RELU
   bottom: "fc7"
   top: "fc7"
 }
 layers {
-  layer {
-    name: "drop7"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop7"
+  type: DROPOUT
   bottom: "fc7"
   top: "fc7"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc8"
-    type: "innerproduct"
-    num_output: 1000
-  }
+  name: "fc8"
+  type: INNER_PRODUCT
   bottom: "fc7"
   top: "fc8"
+  inner_product_param {
+    num_output: 1000
+  }
 }
 layers {
-  layer {
-    name: "prob"
-    type: "softmax"
-  }
+  name: "prob"
+  type: SOFTMAX
   bottom: "fc8"
   top: "prob"
 }
 layers {
-  layer {
-    name: "accuracy"
-    type: "accuracy"
-  }
+  name: "accuracy"
+  type: ACCURACY
   bottom: "prob"
   bottom: "label"
   top: "accuracy"
diff --git a/examples/imagenet/imagenet_deploy.prototxt b/examples/imagenet/imagenet_deploy.prototxt
index 0b1f41ab914..37ab9221da3 100644
--- a/examples/imagenet/imagenet_deploy.prototxt
+++ b/examples/imagenet/imagenet_deploy.prototxt
@@ -5,11 +5,17 @@ input_dim: 3
 input_dim: 227
 input_dim: 227
 layers {
-  layer {
-    name: "conv1"
-    type: "conv"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 96
-    kernelsize: 11
+    kernel_size: 11
     stride: 4
     weight_filler {
       type: "gaussian"
@@ -17,210 +23,200 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 0.
+      value: 0
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "data"
-  top: "conv1"
 }
 layers {
-  layer {
-    name: "relu1"
-    type: "relu"
-  }
+  name: "relu1"
+  type: RELU
   bottom: "conv1"
   top: "conv1"
 }
 layers {
-  layer {
-    name: "pool1"
-    type: "pool"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv1"
-  top: "pool1"
 }
 layers {
-  layer {
-    name: "norm1"
-    type: "lrn"
+  name: "norm1"
+  type: LRN
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool1"
-  top: "norm1"
 }
 layers {
-  layer {
-    name: "conv2"
-    type: "conv"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "norm1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 256
-    group: 2
-    kernelsize: 5
     pad: 2
+    kernel_size: 5
+    group: 2
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "norm1"
-  top: "conv2"
 }
 layers {
-  layer {
-    name: "relu2"
-    type: "relu"
-  }
+  name: "relu2"
+  type: RELU
   bottom: "conv2"
   top: "conv2"
 }
 layers {
-  layer {
-    name: "pool2"
-    type: "pool"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv2"
-  top: "pool2"
 }
 layers {
-  layer {
-    name: "norm2"
-    type: "lrn"
+  name: "norm2"
+  type: LRN
+  bottom: "pool2"
+  top: "norm2"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool2"
-  top: "norm2"
 }
 layers {
-  layer {
-    name: "conv3"
-    type: "conv"
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "norm2"
+  top: "conv3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 384
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 0.
+      value: 0
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "norm2"
-  top: "conv3"
 }
 layers {
-  layer {
-    name: "relu3"
-    type: "relu"
-  }
+  name: "relu3"
+  type: RELU
   bottom: "conv3"
   top: "conv3"
 }
 layers {
-  layer {
-    name: "conv4"
-    type: "conv"
+  name: "conv4"
+  type: CONVOLUTION
+  bottom: "conv3"
+  top: "conv4"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 384
-    group: 2
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
+    group: 2
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "conv3"
-  top: "conv4"
 }
 layers {
-  layer {
-    name: "relu4"
-    type: "relu"
-  }
+  name: "relu4"
+  type: RELU
   bottom: "conv4"
   top: "conv4"
 }
 layers {
-  layer {
-    name: "conv5"
-    type: "conv"
+  name: "conv5"
+  type: CONVOLUTION
+  bottom: "conv4"
+  top: "conv5"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 256
-    group: 2
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
+    group: 2
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "conv4"
-  top: "conv5"
 }
 layers {
-  layer {
-    name: "relu5"
-    type: "relu"
-  }
+  name: "relu5"
+  type: RELU
   bottom: "conv5"
   top: "conv5"
 }
 layers {
-  layer {
-    name: "pool5"
-    type: "pool"
-    kernelsize: 3
+  name: "pool5"
+  type: POOLING
+  bottom: "conv5"
+  top: "pool5"
+  pooling_param {
     pool: MAX
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv5"
-  top: "pool5"
 }
 layers {
-  layer {
-    name: "fc6"
-    type: "innerproduct"
+  name: "fc6"
+  type: INNER_PRODUCT
+  bottom: "pool5"
+  top: "fc6"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 4096
     weight_filler {
       type: "gaussian"
@@ -228,37 +224,35 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pool5"
-  top: "fc6"
 }
 layers {
-  layer {
-    name: "relu6"
-    type: "relu"
-  }
+  name: "relu6"
+  type: RELU
   bottom: "fc6"
   top: "fc6"
 }
 layers {
-  layer {
-    name: "drop6"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop6"
+  type: DROPOUT
   bottom: "fc6"
   top: "fc6"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc7"
-    type: "innerproduct"
+  name: "fc7"
+  type: INNER_PRODUCT
+  bottom: "fc6"
+  top: "fc7"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 4096
     weight_filler {
       type: "gaussian"
@@ -266,37 +260,35 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "fc6"
-  top: "fc7"
 }
 layers {
-  layer {
-    name: "relu7"
-    type: "relu"
-  }
+  name: "relu7"
+  type: RELU
   bottom: "fc7"
   top: "fc7"
 }
 layers {
-  layer {
-    name: "drop7"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop7"
+  type: DROPOUT
   bottom: "fc7"
   top: "fc7"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc8"
-    type: "innerproduct"
+  name: "fc8"
+  type: INNER_PRODUCT
+  bottom: "fc7"
+  top: "fc8"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 1000
     weight_filler {
       type: "gaussian"
@@ -306,19 +298,11 @@ layers {
       type: "constant"
       value: 0
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "fc7"
-  top: "fc8"
 }
 layers {
-  layer {
-    name: "prob"
-    type: "softmax"
-  }
+  name: "prob"
+  type: SOFTMAX
   bottom: "fc8"
   top: "prob"
 }
diff --git a/examples/imagenet/imagenet_train.prototxt b/examples/imagenet/imagenet_train.prototxt
index 9764687c35f..b34a9b49b07 100644
--- a/examples/imagenet/imagenet_train.prototxt
+++ b/examples/imagenet/imagenet_train.prototxt
@@ -1,23 +1,29 @@
 name: "CaffeNet"
 layers {
-  layer {
-    name: "data"
-    type: "data"
+  name: "data"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
     source: "ilvsrc12_train_leveldb"
-    meanfile: "../../data/ilsvrc12/imagenet_mean.binaryproto"
-    batchsize: 256
-    cropsize: 227
+    mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
+    batch_size: 256
+    crop_size: 227
     mirror: true
   }
-  top: "data"
-  top: "label"
 }
 layers {
-  layer {
-    name: "conv1"
-    type: "conv"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 96
-    kernelsize: 11
+    kernel_size: 11
     stride: 4
     weight_filler {
       type: "gaussian"
@@ -25,210 +31,200 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 0.
+      value: 0
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "data"
-  top: "conv1"
 }
 layers {
-  layer {
-    name: "relu1"
-    type: "relu"
-  }
+  name: "relu1"
+  type: RELU
   bottom: "conv1"
   top: "conv1"
 }
 layers {
-  layer {
-    name: "pool1"
-    type: "pool"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv1"
-  top: "pool1"
 }
 layers {
-  layer {
-    name: "norm1"
-    type: "lrn"
+  name: "norm1"
+  type: LRN
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool1"
-  top: "norm1"
 }
 layers {
-  layer {
-    name: "conv2"
-    type: "conv"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "norm1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 256
-    group: 2
-    kernelsize: 5
     pad: 2
+    kernel_size: 5
+    group: 2
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "norm1"
-  top: "conv2"
 }
 layers {
-  layer {
-    name: "relu2"
-    type: "relu"
-  }
+  name: "relu2"
+  type: RELU
   bottom: "conv2"
   top: "conv2"
 }
 layers {
-  layer {
-    name: "pool2"
-    type: "pool"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv2"
-  top: "pool2"
 }
 layers {
-  layer {
-    name: "norm2"
-    type: "lrn"
+  name: "norm2"
+  type: LRN
+  bottom: "pool2"
+  top: "norm2"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool2"
-  top: "norm2"
 }
 layers {
-  layer {
-    name: "conv3"
-    type: "conv"
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "norm2"
+  top: "conv3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 384
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 0.
+      value: 0
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "norm2"
-  top: "conv3"
 }
 layers {
-  layer {
-    name: "relu3"
-    type: "relu"
-  }
+  name: "relu3"
+  type: RELU
   bottom: "conv3"
   top: "conv3"
 }
 layers {
-  layer {
-    name: "conv4"
-    type: "conv"
+  name: "conv4"
+  type: CONVOLUTION
+  bottom: "conv3"
+  top: "conv4"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 384
-    group: 2
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
+    group: 2
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "conv3"
-  top: "conv4"
 }
 layers {
-  layer {
-    name: "relu4"
-    type: "relu"
-  }
+  name: "relu4"
+  type: RELU
   bottom: "conv4"
   top: "conv4"
 }
 layers {
-  layer {
-    name: "conv5"
-    type: "conv"
+  name: "conv5"
+  type: CONVOLUTION
+  bottom: "conv4"
+  top: "conv5"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 256
-    group: 2
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
+    group: 2
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "conv4"
-  top: "conv5"
 }
 layers {
-  layer {
-    name: "relu5"
-    type: "relu"
-  }
+  name: "relu5"
+  type: RELU
   bottom: "conv5"
   top: "conv5"
 }
 layers {
-  layer {
-    name: "pool5"
-    type: "pool"
-    kernelsize: 3
+  name: "pool5"
+  type: POOLING
+  bottom: "conv5"
+  top: "pool5"
+  pooling_param {
     pool: MAX
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv5"
-  top: "pool5"
 }
 layers {
-  layer {
-    name: "fc6"
-    type: "innerproduct"
+  name: "fc6"
+  type: INNER_PRODUCT
+  bottom: "pool5"
+  top: "fc6"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 4096
     weight_filler {
       type: "gaussian"
@@ -236,37 +232,35 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pool5"
-  top: "fc6"
 }
 layers {
-  layer {
-    name: "relu6"
-    type: "relu"
-  }
+  name: "relu6"
+  type: RELU
   bottom: "fc6"
   top: "fc6"
 }
 layers {
-  layer {
-    name: "drop6"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop6"
+  type: DROPOUT
   bottom: "fc6"
   top: "fc6"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc7"
-    type: "innerproduct"
+  name: "fc7"
+  type: INNER_PRODUCT
+  bottom: "fc6"
+  top: "fc7"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 4096
     weight_filler {
       type: "gaussian"
@@ -274,37 +268,35 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "fc6"
-  top: "fc7"
 }
 layers {
-  layer {
-    name: "relu7"
-    type: "relu"
-  }
+  name: "relu7"
+  type: RELU
   bottom: "fc7"
   top: "fc7"
 }
 layers {
-  layer {
-    name: "drop7"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop7"
+  type: DROPOUT
   bottom: "fc7"
   top: "fc7"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc8"
-    type: "innerproduct"
+  name: "fc8"
+  type: INNER_PRODUCT
+  bottom: "fc7"
+  top: "fc8"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 1000
     weight_filler {
       type: "gaussian"
@@ -314,19 +306,11 @@ layers {
       type: "constant"
       value: 0
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "fc7"
-  top: "fc8"
 }
 layers {
-  layer {
-    name: "loss"
-    type: "softmax_loss"
-  }
+  name: "loss"
+  type: SOFTMAX_LOSS
   bottom: "fc8"
   bottom: "label"
 }
diff --git a/examples/imagenet/imagenet_val.prototxt b/examples/imagenet/imagenet_val.prototxt
index a004b74f626..2f1ead7c14a 100644
--- a/examples/imagenet/imagenet_val.prototxt
+++ b/examples/imagenet/imagenet_val.prototxt
@@ -1,244 +1,226 @@
 name: "CaffeNet"
 layers {
-  layer {
-    name: "data"
-    type: "data"
+  name: "data"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
     source: "ilvsrc12_val_leveldb"
-    meanfile: "../../data/ilsvrc12/imagenet_mean.binaryproto"
-    batchsize: 50
-    cropsize: 227
+    mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
+    batch_size: 50
+    crop_size: 227
     mirror: false
   }
-  top: "data"
-  top: "label"
 }
 layers {
-  layer {
-    name: "conv1"
-    type: "conv"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  convolution_param {
     num_output: 96
-    kernelsize: 11
+    kernel_size: 11
     stride: 4
   }
-  bottom: "data"
-  top: "conv1"
 }
 layers {
-  layer {
-    name: "relu1"
-    type: "relu"
-  }
+  name: "relu1"
+  type: RELU
   bottom: "conv1"
   top: "conv1"
 }
 layers {
-  layer {
-    name: "pool1"
-    type: "pool"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv1"
-  top: "pool1"
 }
 layers {
-  layer {
-    name: "norm1"
-    type: "lrn"
+  name: "norm1"
+  type: LRN
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool1"
-  top: "norm1"
 }
 layers {
-  layer {
-    name: "conv2"
-    type: "conv"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "norm1"
+  top: "conv2"
+  convolution_param {
     num_output: 256
-    group: 2
-    kernelsize: 5
     pad: 2
+    kernel_size: 5
+    group: 2
   }
-  bottom: "norm1"
-  top: "conv2"
 }
 layers {
-  layer {
-    name: "relu2"
-    type: "relu"
-  }
+  name: "relu2"
+  type: RELU
   bottom: "conv2"
   top: "conv2"
 }
 layers {
-  layer {
-    name: "pool2"
-    type: "pool"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv2"
-  top: "pool2"
 }
 layers {
-  layer {
-    name: "norm2"
-    type: "lrn"
+  name: "norm2"
+  type: LRN
+  bottom: "pool2"
+  top: "norm2"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool2"
-  top: "norm2"
 }
 layers {
-  layer {
-    name: "conv3"
-    type: "conv"
+  name: "conv3"
+  type: CONVOLUTION
+  bottom: "norm2"
+  top: "conv3"
+  convolution_param {
     num_output: 384
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
   }
-  bottom: "norm2"
-  top: "conv3"
 }
 layers {
-  layer {
-    name: "relu3"
-    type: "relu"
-  }
+  name: "relu3"
+  type: RELU
   bottom: "conv3"
   top: "conv3"
 }
 layers {
-  layer {
-    name: "conv4"
-    type: "conv"
+  name: "conv4"
+  type: CONVOLUTION
+  bottom: "conv3"
+  top: "conv4"
+  convolution_param {
     num_output: 384
-    group: 2
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
+    group: 2
   }
-  bottom: "conv3"
-  top: "conv4"
 }
 layers {
-  layer {
-    name: "relu4"
-    type: "relu"
-  }
+  name: "relu4"
+  type: RELU
   bottom: "conv4"
   top: "conv4"
 }
 layers {
-  layer {
-    name: "conv5"
-    type: "conv"
+  name: "conv5"
+  type: CONVOLUTION
+  bottom: "conv4"
+  top: "conv5"
+  convolution_param {
     num_output: 256
-    group: 2
-    kernelsize: 3
     pad: 1
+    kernel_size: 3
+    group: 2
   }
-  bottom: "conv4"
-  top: "conv5"
 }
 layers {
-  layer {
-    name: "relu5"
-    type: "relu"
-  }
+  name: "relu5"
+  type: RELU
   bottom: "conv5"
   top: "conv5"
 }
 layers {
-  layer {
-    name: "pool5"
-    type: "pool"
-    kernelsize: 3
+  name: "pool5"
+  type: POOLING
+  bottom: "conv5"
+  top: "pool5"
+  pooling_param {
     pool: MAX
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv5"
-  top: "pool5"
 }
 layers {
-  layer {
-    name: "fc6"
-    type: "innerproduct"
-    num_output: 4096
-  }
+  name: "fc6"
+  type: INNER_PRODUCT
   bottom: "pool5"
   top: "fc6"
+  inner_product_param {
+    num_output: 4096
+  }
 }
 layers {
-  layer {
-    name: "relu6"
-    type: "relu"
-  }
+  name: "relu6"
+  type: RELU
   bottom: "fc6"
   top: "fc6"
 }
 layers {
-  layer {
-    name: "drop6"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop6"
+  type: DROPOUT
   bottom: "fc6"
   top: "fc6"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc7"
-    type: "innerproduct"
-    num_output: 4096
-  }
+  name: "fc7"
+  type: INNER_PRODUCT
   bottom: "fc6"
   top: "fc7"
+  inner_product_param {
+    num_output: 4096
+  }
 }
 layers {
-  layer {
-    name: "relu7"
-    type: "relu"
-  }
+  name: "relu7"
+  type: RELU
   bottom: "fc7"
   top: "fc7"
 }
 layers {
-  layer {
-    name: "drop7"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop7"
+  type: DROPOUT
   bottom: "fc7"
   top: "fc7"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc8"
-    type: "innerproduct"
-    num_output: 1000
-  }
+  name: "fc8"
+  type: INNER_PRODUCT
   bottom: "fc7"
   top: "fc8"
+  inner_product_param {
+    num_output: 1000
+  }
 }
 layers {
-  layer {
-    name: "prob"
-    type: "softmax"
-  }
+  name: "prob"
+  type: SOFTMAX
   bottom: "fc8"
   top: "prob"
 }
 layers {
-  layer {
-    name: "accuracy"
-    type: "accuracy"
-  }
+  name: "accuracy"
+  type: ACCURACY
   bottom: "prob"
   bottom: "label"
   top: "accuracy"
diff --git a/examples/lenet/lenet.prototxt b/examples/lenet/lenet.prototxt
index 4c49745e809..491fad1b1c0 100644
--- a/examples/lenet/lenet.prototxt
+++ b/examples/lenet/lenet.prototxt
@@ -4,13 +4,16 @@ input_dim: 64
 input_dim: 1
 input_dim: 28
 input_dim: 28
-# N.B. input should be 0/1 = mnist raw data scaled by 0.00390625
 layers {
-  layer {
-    name: "conv1"
-    type: "conv"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
     num_output: 20
-    kernelsize: 5
+    kernel_size: 5
     stride: 1
     weight_filler {
       type: "xavier"
@@ -18,29 +21,29 @@ layers {
     bias_filler {
       type: "constant"
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
   }
-  bottom: "data"
-  top: "conv1"
 }
 layers {
-  layer {
-    name: "pool1"
-    type: "pool"
-    kernelsize: 2
-    stride: 2
-    pool: MAX
-  }
+  name: "pool1"
+  type: POOLING
   bottom: "conv1"
   top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
 }
 layers {
-  layer {
-    name: "conv2"
-    type: "conv"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "pool1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
     num_output: 50
-    kernelsize: 5
+    kernel_size: 5
     stride: 1
     weight_filler {
       type: "xavier"
@@ -48,27 +51,27 @@ layers {
     bias_filler {
       type: "constant"
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
   }
-  bottom: "pool1"
-  top: "conv2"
 }
 layers {
-  layer {
-    name: "pool2"
-    type: "pool"
-    kernelsize: 2
-    stride: 2
-    pool: MAX
-  }
+  name: "pool2"
+  type: POOLING
   bottom: "conv2"
   top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
 }
 layers {
-  layer {
-    name: "ip1"
-    type: "innerproduct"
+  name: "ip1"
+  type: INNER_PRODUCT
+  bottom: "pool2"
+  top: "ip1"
+  blobs_lr: 1
+  blobs_lr: 2
+  inner_product_param {
     num_output: 500
     weight_filler {
       type: "xavier"
@@ -76,24 +79,22 @@ layers {
     bias_filler {
       type: "constant"
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
   }
-  bottom: "pool2"
-  top: "ip1"
 }
 layers {
-  layer {
-    name: "relu1"
-    type: "relu"
-  }
+  name: "relu1"
+  type: RELU
   bottom: "ip1"
   top: "ip1"
 }
 layers {
-  layer {
-    name: "ip2"
-    type: "innerproduct"
+  name: "ip2"
+  type: INNER_PRODUCT
+  bottom: "ip1"
+  top: "ip2"
+  blobs_lr: 1
+  blobs_lr: 2
+  inner_product_param {
     num_output: 10
     weight_filler {
       type: "xavier"
@@ -101,17 +102,11 @@ layers {
     bias_filler {
       type: "constant"
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
   }
-  bottom: "ip1"
-  top: "ip2"
 }
 layers {
-  layer {
-    name: "prob"
-    type: "softmax"
-  }
+  name: "prob"
+  type: SOFTMAX
   bottom: "ip2"
   top: "prob"
 }
diff --git a/examples/lenet/lenet_test.prototxt b/examples/lenet/lenet_test.prototxt
index 676a2a6ab7d..3b59b75513d 100644
--- a/examples/lenet/lenet_test.prototxt
+++ b/examples/lenet/lenet_test.prototxt
@@ -1,21 +1,23 @@
 name: "LeNet-test"
 layers {
-  layer {
-    name: "mnist"
-    type: "data"
+  name: "mnist"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
     source: "mnist-test-leveldb"
-    batchsize: 100
     scale: 0.00390625
+    batch_size: 100
   }
-  top: "data"
-  top: "label"
 }
 layers {
-  layer {
-    name: "conv1"
-    type: "conv"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  convolution_param {
     num_output: 20
-    kernelsize: 5
+    kernel_size: 5
     stride: 1
     weight_filler {
       type: "xavier"
@@ -24,26 +26,26 @@ layers {
       type: "constant"
     }
   }
-  bottom: "data"
-  top: "conv1"
 }
 layers {
-  layer {
-    name: "pool1"
-    type: "pool"
-    kernelsize: 2
-    stride: 2
-    pool: MAX
-  }
+  name: "pool1"
+  type: POOLING
   bottom: "conv1"
   top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
 }
 layers {
-  layer {
-    name: "conv2"
-    type: "conv"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "pool1"
+  top: "conv2"
+  convolution_param {
     num_output: 50
-    kernelsize: 5
+    kernel_size: 5
     stride: 1
     weight_filler {
       type: "xavier"
@@ -52,24 +54,24 @@ layers {
       type: "constant"
     }
   }
-  bottom: "pool1"
-  top: "conv2"
 }
 layers {
-  layer {
-    name: "pool2"
-    type: "pool"
-    kernelsize: 2
-    stride: 2
-    pool: MAX
-  }
+  name: "pool2"
+  type: POOLING
   bottom: "conv2"
   top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
 }
 layers {
-  layer {
-    name: "ip1"
-    type: "innerproduct"
+  name: "ip1"
+  type: INNER_PRODUCT
+  bottom: "pool2"
+  top: "ip1"
+  inner_product_param {
     num_output: 500
     weight_filler {
       type: "xavier"
@@ -78,21 +80,19 @@ layers {
       type: "constant"
     }
   }
-  bottom: "pool2"
-  top: "ip1"
 }
 layers {
-  layer {
-    name: "relu1"
-    type: "relu"
-  }
+  name: "relu1"
+  type: RELU
   bottom: "ip1"
   top: "ip1"
 }
 layers {
-  layer {
-    name: "ip2"
-    type: "innerproduct"
+  name: "ip2"
+  type: INNER_PRODUCT
+  bottom: "ip1"
+  top: "ip2"
+  inner_product_param {
     num_output: 10
     weight_filler {
       type: "xavier"
@@ -101,22 +101,16 @@ layers {
       type: "constant"
     }
   }
-  bottom: "ip1"
-  top: "ip2"
 }
 layers {
-  layer {
-    name: "prob"
-    type: "softmax"
-  }
+  name: "prob"
+  type: SOFTMAX
   bottom: "ip2"
   top: "prob"
 }
 layers {
-  layer {
-    name: "accuracy"
-    type: "accuracy"
-  }
+  name: "accuracy"
+  type: ACCURACY
   bottom: "prob"
   bottom: "label"
   top: "accuracy"
diff --git a/examples/lenet/lenet_train.prototxt b/examples/lenet/lenet_train.prototxt
index f5877ae4804..e8a1e74e40b 100644
--- a/examples/lenet/lenet_train.prototxt
+++ b/examples/lenet/lenet_train.prototxt
@@ -1,21 +1,25 @@
 name: "LeNet"
 layers {
-  layer {
-    name: "mnist"
-    type: "data"
+  name: "mnist"
+  type: DATA
+  top: "data"
+  top: "label"
+  data_param {
     source: "mnist-train-leveldb"
-    batchsize: 64
     scale: 0.00390625
+    batch_size: 64
   }
-  top: "data"
-  top: "label"
 }
 layers {
-  layer {
-    name: "conv1"
-    type: "conv"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
     num_output: 20
-    kernelsize: 5
+    kernel_size: 5
     stride: 1
     weight_filler {
       type: "xavier"
@@ -23,29 +27,29 @@ layers {
     bias_filler {
       type: "constant"
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
   }
-  bottom: "data"
-  top: "conv1"
 }
 layers {
-  layer {
-    name: "pool1"
-    type: "pool"
-    kernelsize: 2
-    stride: 2
-    pool: MAX
-  }
+  name: "pool1"
+  type: POOLING
   bottom: "conv1"
   top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
 }
 layers {
-  layer {
-    name: "conv2"
-    type: "conv"
+  name: "conv2"
+  type: CONVOLUTION
+  bottom: "pool1"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  convolution_param {
     num_output: 50
-    kernelsize: 5
+    kernel_size: 5
     stride: 1
     weight_filler {
       type: "xavier"
@@ -53,27 +57,27 @@ layers {
     bias_filler {
       type: "constant"
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
   }
-  bottom: "pool1"
-  top: "conv2"
 }
 layers {
-  layer {
-    name: "pool2"
-    type: "pool"
-    kernelsize: 2
-    stride: 2
-    pool: MAX
-  }
+  name: "pool2"
+  type: POOLING
   bottom: "conv2"
   top: "pool2"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
 }
 layers {
-  layer {
-    name: "ip1"
-    type: "innerproduct"
+  name: "ip1"
+  type: INNER_PRODUCT
+  bottom: "pool2"
+  top: "ip1"
+  blobs_lr: 1
+  blobs_lr: 2
+  inner_product_param {
     num_output: 500
     weight_filler {
       type: "xavier"
@@ -81,24 +85,22 @@ layers {
     bias_filler {
       type: "constant"
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
   }
-  bottom: "pool2"
-  top: "ip1"
 }
 layers {
-  layer {
-    name: "relu1"
-    type: "relu"
-  }
+  name: "relu1"
+  type: RELU
   bottom: "ip1"
   top: "ip1"
 }
 layers {
-  layer {
-    name: "ip2"
-    type: "innerproduct"
+  name: "ip2"
+  type: INNER_PRODUCT
+  bottom: "ip1"
+  top: "ip2"
+  blobs_lr: 1
+  blobs_lr: 2
+  inner_product_param {
     num_output: 10
     weight_filler {
       type: "xavier"
@@ -106,17 +108,11 @@ layers {
     bias_filler {
       type: "constant"
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
   }
-  bottom: "ip1"
-  top: "ip2"
 }
 layers {
-  layer {
-    name: "loss"
-    type: "softmax_loss"
-  }
+  name: "loss"
+  type: SOFTMAX_LOSS
   bottom: "ip2"
   bottom: "label"
 }
diff --git a/examples/pascal-finetuning/pascal_finetune_train.prototxt b/examples/pascal-finetuning/pascal_finetune_train.prototxt
index ac847813454..dfc60fe4b8a 100644
--- a/examples/pascal-finetuning/pascal_finetune_train.prototxt
+++ b/examples/pascal-finetuning/pascal_finetune_train.prototxt
@@ -1,28 +1,34 @@
 name: "CaffeNet"
 layers {
-  layer {
-    name: "data"
-    type: "window_data"
+  name: "data"
+  type: WINDOW_DATA
+  top: "data"
+  top: "label"
+  window_data_param {
     source: "window_file_2007_trainval.txt"
-    meanfile: "../../data/ilsvrc12/imagenet_mean.binaryproto"
-    batchsize: 128
-    cropsize: 227
+    mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
+    batch_size: 128
+    crop_size: 227
     mirror: true
-    det_context_pad: 16
-    det_crop_mode: "warp"
-    det_fg_threshold: 0.5
-    det_bg_threshold: 0.5
-    det_fg_fraction: 0.25
+    fg_threshold: 0.5
+    bg_threshold: 0.5
+    fg_fraction: 0.25
+    context_pad: 16
+    crop_mode: "warp"
   }
-  top: "data"
-  top: "label"
 }
 layers {
-  layer {
-    name: "conv1"
-    type: "conv"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 96
-    kernelsize: 11
+    kernel_size: 11
     stride: 4
     weight_filler {
       type: "gaussian"
@@ -30,242 +36,200 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 0.
+      value: 0
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "data"
-  top: "conv1"
 }
 layers {
-  layer {
-    name: "relu1"
-    type: "relu"
-  }
+  name: "relu1"
+  type: RELU
   bottom: "conv1"
   top: "conv1"
 }
 layers {
-  layer {
-    name: "pool1"
-    type: "pool"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv1"
-  top: "pool1"
 }
 layers {
-  layer {
-    name: "norm1"
-    type: "lrn"
+  name: "norm1"
+  type: LRN
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool1"
-  top: "norm1"
 }
 layers {
-  layer {
-    name: "pad2"
-    type: "padding"
-    pad: 2
-  }
+  name: "conv2"
+  type: CONVOLUTION
   bottom: "norm1"
-  top: "pad2"
-}
-layers {
-  layer {
-    name: "conv2"
-    type: "conv"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 256
+    pad: 2
+    kernel_size: 5
     group: 2
-    kernelsize: 5
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pad2"
-  top: "conv2"
 }
 layers {
-  layer {
-    name: "relu2"
-    type: "relu"
-  }
+  name: "relu2"
+  type: RELU
   bottom: "conv2"
   top: "conv2"
 }
 layers {
-  layer {
-    name: "pool2"
-    type: "pool"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv2"
-  top: "pool2"
 }
 layers {
-  layer {
-    name: "norm2"
-    type: "lrn"
+  name: "norm2"
+  type: LRN
+  bottom: "pool2"
+  top: "norm2"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool2"
-  top: "norm2"
 }
 layers {
-  layer {
-    name: "pad3"
-    type: "padding"
-    pad: 1
-  }
+  name: "conv3"
+  type: CONVOLUTION
   bottom: "norm2"
-  top: "pad3"
-}
-layers {
-  layer {
-    name: "conv3"
-    type: "conv"
+  top: "conv3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 384
-    kernelsize: 3
+    pad: 1
+    kernel_size: 3
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 0.
+      value: 0
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pad3"
-  top: "conv3"
 }
 layers {
-  layer {
-    name: "relu3"
-    type: "relu"
-  }
+  name: "relu3"
+  type: RELU
   bottom: "conv3"
   top: "conv3"
 }
 layers {
-  layer {
-    name: "pad4"
-    type: "padding"
-    pad: 1
-  }
+  name: "conv4"
+  type: CONVOLUTION
   bottom: "conv3"
-  top: "pad4"
-}
-layers {
-  layer {
-    name: "conv4"
-    type: "conv"
+  top: "conv4"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 384
+    pad: 1
+    kernel_size: 3
     group: 2
-    kernelsize: 3
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pad4"
-  top: "conv4"
 }
 layers {
-  layer {
-    name: "relu4"
-    type: "relu"
-  }
+  name: "relu4"
+  type: RELU
   bottom: "conv4"
   top: "conv4"
 }
 layers {
-  layer {
-    name: "pad5"
-    type: "padding"
-    pad: 1
-  }
+  name: "conv5"
+  type: CONVOLUTION
   bottom: "conv4"
-  top: "pad5"
-}
-layers {
-  layer {
-    name: "conv5"
-    type: "conv"
+  top: "conv5"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 256
+    pad: 1
+    kernel_size: 3
     group: 2
-    kernelsize: 3
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pad5"
-  top: "conv5"
 }
 layers {
-  layer {
-    name: "relu5"
-    type: "relu"
-  }
+  name: "relu5"
+  type: RELU
   bottom: "conv5"
   top: "conv5"
 }
 layers {
-  layer {
-    name: "pool5"
-    type: "pool"
-    kernelsize: 3
+  name: "pool5"
+  type: POOLING
+  bottom: "conv5"
+  top: "pool5"
+  pooling_param {
     pool: MAX
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv5"
-  top: "pool5"
 }
 layers {
-  layer {
-    name: "fc6"
-    type: "innerproduct"
+  name: "fc6"
+  type: INNER_PRODUCT
+  bottom: "pool5"
+  top: "fc6"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 4096
     weight_filler {
       type: "gaussian"
@@ -273,37 +237,35 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pool5"
-  top: "fc6"
 }
 layers {
-  layer {
-    name: "relu6"
-    type: "relu"
-  }
+  name: "relu6"
+  type: RELU
   bottom: "fc6"
   top: "fc6"
 }
 layers {
-  layer {
-    name: "drop6"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop6"
+  type: DROPOUT
   bottom: "fc6"
   top: "fc6"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc7"
-    type: "innerproduct"
+  name: "fc7"
+  type: INNER_PRODUCT
+  bottom: "fc6"
+  top: "fc7"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 4096
     weight_filler {
       type: "gaussian"
@@ -311,37 +273,35 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "fc6"
-  top: "fc7"
 }
 layers {
-  layer {
-    name: "relu7"
-    type: "relu"
-  }
+  name: "relu7"
+  type: RELU
   bottom: "fc7"
   top: "fc7"
 }
 layers {
-  layer {
-    name: "drop7"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop7"
+  type: DROPOUT
   bottom: "fc7"
   top: "fc7"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc8_pascal"
-    type: "innerproduct"
+  name: "fc8_pascal"
+  type: INNER_PRODUCT
+  bottom: "fc7"
+  top: "fc8_pascal"
+  blobs_lr: 10
+  blobs_lr: 20
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 21
     weight_filler {
       type: "gaussian"
@@ -351,19 +311,11 @@ layers {
       type: "constant"
       value: 0
     }
-    blobs_lr: 10.
-    blobs_lr: 20.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "fc7"
-  top: "fc8_pascal"
 }
 layers {
-  layer {
-    name: "loss"
-    type: "softmax_loss"
-  }
+  name: "loss"
+  type: SOFTMAX_LOSS
   bottom: "fc8_pascal"
   bottom: "label"
 }
diff --git a/examples/pascal-finetuning/pascal_finetune_val.prototxt b/examples/pascal-finetuning/pascal_finetune_val.prototxt
index a11033ad1e2..ff898fe7376 100644
--- a/examples/pascal-finetuning/pascal_finetune_val.prototxt
+++ b/examples/pascal-finetuning/pascal_finetune_val.prototxt
@@ -1,28 +1,34 @@
 name: "CaffeNet"
 layers {
-  layer {
-    name: "data"
-    type: "window_data"
+  name: "data"
+  type: WINDOW_DATA
+  top: "data"
+  top: "label"
+  window_data_param {
     source: "window_file_2007_test.txt"
-    meanfile: "../../data/ilsvrc12/imagenet_mean.binaryproto"
-    batchsize: 128
-    cropsize: 227
+    mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
+    batch_size: 128
+    crop_size: 227
     mirror: true
-    det_context_pad: 16
-    det_crop_mode: "warp"
-    det_fg_threshold: 0.5
-    det_bg_threshold: 0.5
-    det_fg_fraction: 0.25
+    fg_threshold: 0.5
+    bg_threshold: 0.5
+    fg_fraction: 0.25
+    context_pad: 16
+    crop_mode: "warp"
   }
-  top: "data"
-  top: "label"
 }
 layers {
-  layer {
-    name: "conv1"
-    type: "conv"
+  name: "conv1"
+  type: CONVOLUTION
+  bottom: "data"
+  top: "conv1"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 96
-    kernelsize: 11
+    kernel_size: 11
     stride: 4
     weight_filler {
       type: "gaussian"
@@ -30,242 +36,200 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 0.
+      value: 0
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "data"
-  top: "conv1"
 }
 layers {
-  layer {
-    name: "relu1"
-    type: "relu"
-  }
+  name: "relu1"
+  type: RELU
   bottom: "conv1"
   top: "conv1"
 }
 layers {
-  layer {
-    name: "pool1"
-    type: "pool"
+  name: "pool1"
+  type: POOLING
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv1"
-  top: "pool1"
 }
 layers {
-  layer {
-    name: "norm1"
-    type: "lrn"
+  name: "norm1"
+  type: LRN
+  bottom: "pool1"
+  top: "norm1"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool1"
-  top: "norm1"
 }
 layers {
-  layer {
-    name: "pad2"
-    type: "padding"
-    pad: 2
-  }
+  name: "conv2"
+  type: CONVOLUTION
   bottom: "norm1"
-  top: "pad2"
-}
-layers {
-  layer {
-    name: "conv2"
-    type: "conv"
+  top: "conv2"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 256
+    pad: 2
+    kernel_size: 5
     group: 2
-    kernelsize: 5
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pad2"
-  top: "conv2"
 }
 layers {
-  layer {
-    name: "relu2"
-    type: "relu"
-  }
+  name: "relu2"
+  type: RELU
   bottom: "conv2"
   top: "conv2"
 }
 layers {
-  layer {
-    name: "pool2"
-    type: "pool"
+  name: "pool2"
+  type: POOLING
+  bottom: "conv2"
+  top: "pool2"
+  pooling_param {
     pool: MAX
-    kernelsize: 3
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv2"
-  top: "pool2"
 }
 layers {
-  layer {
-    name: "norm2"
-    type: "lrn"
+  name: "norm2"
+  type: LRN
+  bottom: "pool2"
+  top: "norm2"
+  lrn_param {
     local_size: 5
     alpha: 0.0001
     beta: 0.75
   }
-  bottom: "pool2"
-  top: "norm2"
 }
 layers {
-  layer {
-    name: "pad3"
-    type: "padding"
-    pad: 1
-  }
+  name: "conv3"
+  type: CONVOLUTION
   bottom: "norm2"
-  top: "pad3"
-}
-layers {
-  layer {
-    name: "conv3"
-    type: "conv"
+  top: "conv3"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 384
-    kernelsize: 3
+    pad: 1
+    kernel_size: 3
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 0.
+      value: 0
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pad3"
-  top: "conv3"
 }
 layers {
-  layer {
-    name: "relu3"
-    type: "relu"
-  }
+  name: "relu3"
+  type: RELU
   bottom: "conv3"
   top: "conv3"
 }
 layers {
-  layer {
-    name: "pad4"
-    type: "padding"
-    pad: 1
-  }
+  name: "conv4"
+  type: CONVOLUTION
   bottom: "conv3"
-  top: "pad4"
-}
-layers {
-  layer {
-    name: "conv4"
-    type: "conv"
+  top: "conv4"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 384
+    pad: 1
+    kernel_size: 3
     group: 2
-    kernelsize: 3
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pad4"
-  top: "conv4"
 }
 layers {
-  layer {
-    name: "relu4"
-    type: "relu"
-  }
+  name: "relu4"
+  type: RELU
   bottom: "conv4"
   top: "conv4"
 }
 layers {
-  layer {
-    name: "pad5"
-    type: "padding"
-    pad: 1
-  }
+  name: "conv5"
+  type: CONVOLUTION
   bottom: "conv4"
-  top: "pad5"
-}
-layers {
-  layer {
-    name: "conv5"
-    type: "conv"
+  top: "conv5"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  convolution_param {
     num_output: 256
+    pad: 1
+    kernel_size: 3
     group: 2
-    kernelsize: 3
     weight_filler {
       type: "gaussian"
       std: 0.01
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pad5"
-  top: "conv5"
 }
 layers {
-  layer {
-    name: "relu5"
-    type: "relu"
-  }
+  name: "relu5"
+  type: RELU
   bottom: "conv5"
   top: "conv5"
 }
 layers {
-  layer {
-    name: "pool5"
-    type: "pool"
-    kernelsize: 3
+  name: "pool5"
+  type: POOLING
+  bottom: "conv5"
+  top: "pool5"
+  pooling_param {
     pool: MAX
+    kernel_size: 3
     stride: 2
   }
-  bottom: "conv5"
-  top: "pool5"
 }
 layers {
-  layer {
-    name: "fc6"
-    type: "innerproduct"
+  name: "fc6"
+  type: INNER_PRODUCT
+  bottom: "pool5"
+  top: "fc6"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 4096
     weight_filler {
       type: "gaussian"
@@ -273,37 +237,35 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "pool5"
-  top: "fc6"
 }
 layers {
-  layer {
-    name: "relu6"
-    type: "relu"
-  }
+  name: "relu6"
+  type: RELU
   bottom: "fc6"
   top: "fc6"
 }
 layers {
-  layer {
-    name: "drop6"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop6"
+  type: DROPOUT
   bottom: "fc6"
   top: "fc6"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc7"
-    type: "innerproduct"
+  name: "fc7"
+  type: INNER_PRODUCT
+  bottom: "fc6"
+  top: "fc7"
+  blobs_lr: 1
+  blobs_lr: 2
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 4096
     weight_filler {
       type: "gaussian"
@@ -311,37 +273,35 @@ layers {
     }
     bias_filler {
       type: "constant"
-      value: 1.
+      value: 1
     }
-    blobs_lr: 1.
-    blobs_lr: 2.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "fc6"
-  top: "fc7"
 }
 layers {
-  layer {
-    name: "relu7"
-    type: "relu"
-  }
+  name: "relu7"
+  type: RELU
   bottom: "fc7"
   top: "fc7"
 }
 layers {
-  layer {
-    name: "drop7"
-    type: "dropout"
-    dropout_ratio: 0.5
-  }
+  name: "drop7"
+  type: DROPOUT
   bottom: "fc7"
   top: "fc7"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
 }
 layers {
-  layer {
-    name: "fc8_pascal"
-    type: "innerproduct"
+  name: "fc8_pascal"
+  type: INNER_PRODUCT
+  bottom: "fc7"
+  top: "fc8_pascal"
+  blobs_lr: 10
+  blobs_lr: 20
+  weight_decay: 1
+  weight_decay: 0
+  inner_product_param {
     num_output: 21
     weight_filler {
       type: "gaussian"
@@ -351,27 +311,17 @@ layers {
       type: "constant"
       value: 0
     }
-    blobs_lr: 10.
-    blobs_lr: 20.
-    weight_decay: 1.
-    weight_decay: 0.
   }
-  bottom: "fc7"
-  top: "fc8_pascal"
 }
 layers {
-  layer {
-    name: "prob"
-    type: "softmax"
-  }
+  name: "prob"
+  type: SOFTMAX
   bottom: "fc8_pascal"
   top: "prob"
 }
 layers {
-  layer {
-    name: "accuracy"
-    type: "accuracy"
-  }
+  name: "accuracy"
+  type: ACCURACY
   bottom: "prob"
   bottom: "label"
   top: "accuracy"
diff --git a/include/caffe/util/insert_splits.hpp b/include/caffe/util/insert_splits.hpp
index af824e63553..e25cdd7faf1 100644
--- a/include/caffe/util/insert_splits.hpp
+++ b/include/caffe/util/insert_splits.hpp
@@ -14,16 +14,16 @@ namespace caffe {
 
 // Copy NetParameters with SplitLayers added to replace any shared bottom
 // blobs with unique bottom blobs provided by the SplitLayer.
-void insert_splits(const NetParameter& param, NetParameter* param_split);
+void InsertSplits(const NetParameter& param, NetParameter* param_split);
 
-void configure_split_layer(const string& layer_name, const string& blob_name,
+void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
     const int blob_idx, const int split_count,
-    LayerConnection* split_layer_connection);
+    LayerParameter* split_layer_param);
 
-string get_split_layer_name(const string& layer_name, const string& blob_name,
+string SplitLayerName(const string& layer_name, const string& blob_name,
     const int blob_idx);
 
-string get_split_blob_name(const string& layer_name, const string& blob_name,
+string SplitBlobName(const string& layer_name, const string& blob_name,
     const int blob_idx, const int split_idx);
 
 }  // namespace caffe
diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp
index 89f9c18429e..056b573db4c 100644
--- a/include/caffe/util/io.hpp
+++ b/include/caffe/util/io.hpp
@@ -19,11 +19,18 @@ using ::google::protobuf::Message;
 
 namespace caffe {
 
-void ReadProtoFromTextFile(const char* filename,
-    Message* proto);
-inline void ReadProtoFromTextFile(const string& filename,
-    Message* proto) {
-  ReadProtoFromTextFile(filename.c_str(), proto);
+bool ReadProtoFromTextFile(const char* filename, Message* proto);
+
+inline bool ReadProtoFromTextFile(const string& filename, Message* proto) {
+  return ReadProtoFromTextFile(filename.c_str(), proto);
+}
+
+inline void ReadProtoFromTextFileOrDie(const char* filename, Message* proto) {
+  CHECK(ReadProtoFromTextFile(filename, proto));
+}
+
+inline void ReadProtoFromTextFileOrDie(const string& filename, Message* proto) {
+  ReadProtoFromTextFileOrDie(filename.c_str(), proto);
 }
 
 void WriteProtoToTextFile(const Message& proto, const char* filename);
@@ -31,13 +38,22 @@ inline void WriteProtoToTextFile(const Message& proto, const string& filename) {
   WriteProtoToTextFile(proto, filename.c_str());
 }
 
-void ReadProtoFromBinaryFile(const char* filename,
-    Message* proto);
-inline void ReadProtoFromBinaryFile(const string& filename,
-    Message* proto) {
-  ReadProtoFromBinaryFile(filename.c_str(), proto);
+bool ReadProtoFromBinaryFile(const char* filename, Message* proto);
+
+inline bool ReadProtoFromBinaryFile(const string& filename, Message* proto) {
+  return ReadProtoFromBinaryFile(filename.c_str(), proto);
 }
 
+inline void ReadProtoFromBinaryFileOrDie(const char* filename, Message* proto) {
+  CHECK(ReadProtoFromBinaryFile(filename, proto));
+}
+
+inline void ReadProtoFromBinaryFileOrDie(const string& filename,
+                                         Message* proto) {
+  ReadProtoFromBinaryFileOrDie(filename.c_str(), proto);
+}
+
+
 void WriteProtoToBinaryFile(const Message& proto, const char* filename);
 inline void WriteProtoToBinaryFile(
     const Message& proto, const string& filename) {
diff --git a/include/caffe/util/upgrade_proto.hpp b/include/caffe/util/upgrade_proto.hpp
new file mode 100644
index 00000000000..a1ac060970f
--- /dev/null
+++ b/include/caffe/util/upgrade_proto.hpp
@@ -0,0 +1,49 @@
+// Copyright 2014 BVLC and contributors.
+
+#ifndef CAFFE_UTIL_UPGRADE_PROTO_H_
+#define CAFFE_UTIL_UPGRADE_PROTO_H_
+
+#include <string>
+
+#include "caffe/proto/caffe.pb.h"
+#include "caffe/proto/caffe_pretty_print.pb.h"
+
+using std::string;
+
+namespace caffe {
+
+// Return true iff any layer contains parameters specified using
+// deprecated V0LayerParameter.
+bool NetNeedsUpgrade(const NetParameter& net_param);
+
+// Perform all necessary transformations to upgrade a V0NetParameter into a
+// NetParameter (including upgrading padding layers and LayerParameters).
+bool UpgradeV0Net(const NetParameter& v0_net_param, NetParameter* net_param);
+
+// Upgrade NetParameter with padding layers to pad-aware conv layers.
+// For any padding layer, remove it and put its pad parameter in any layers
+// taking its top blob as input.
+// Error if any of these above layers are not-conv layers.
+void UpgradeV0PaddingLayers(const NetParameter& param,
+                            NetParameter* param_upgraded_pad);
+
+// Upgrade a single V0LayerConnection to the new LayerParameter format.
+bool UpgradeLayerParameter(const LayerParameter& v0_layer_connection,
+                           LayerParameter* layer_param);
+
+LayerParameter_LayerType UpgradeV0LayerType(const string& type);
+
+// Convert a NetParameter to NetParameterPrettyPrint used for dumping to
+// proto text files.
+void NetParameterToPrettyPrint(const NetParameter& param,
+                               NetParameterPrettyPrint* pretty_param);
+
+// Read parameters from a file into a NetParameter proto message.
+void ReadNetParamsFromTextFileOrDie(const string& param_file,
+                                    NetParameter* param);
+void ReadNetParamsFromBinaryFileOrDie(const string& param_file,
+                                      NetParameter* param);
+
+}  // namespace caffe
+
+#endif   // CAFFE_UTIL_UPGRADE_PROTO_H_
diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
index a8305abbd6e..4f6dfa70be2 100644
--- a/include/caffe/vision_layers.hpp
+++ b/include/caffe/vision_layers.hpp
@@ -34,11 +34,10 @@ class NeuronLayer : public Layer<Dtype> {
       vector<Blob<Dtype>*>* top);
 };
 
-
 template <typename Dtype>
-class ReLULayer : public NeuronLayer<Dtype> {
+class BNLLLayer : public NeuronLayer<Dtype> {
  public:
-  explicit ReLULayer(const LayerParameter& param)
+  explicit BNLLLayer(const LayerParameter& param)
       : NeuronLayer<Dtype>(param) {}
 
  protected:
@@ -46,7 +45,6 @@ class ReLULayer : public NeuronLayer<Dtype> {
       vector<Blob<Dtype>*>* top);
   virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
@@ -54,27 +52,33 @@ class ReLULayer : public NeuronLayer<Dtype> {
 };
 
 template <typename Dtype>
-class TanHLayer : public NeuronLayer<Dtype> {
+class DropoutLayer : public NeuronLayer<Dtype> {
  public:
-  explicit TanHLayer(const LayerParameter& param)
+  explicit DropoutLayer(const LayerParameter& param)
       : NeuronLayer<Dtype>(param) {}
+  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
 
  protected:
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
   virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+
+  shared_ptr<SyncedMemory> rand_vec_;
+  float threshold_;
+  float scale_;
+  unsigned int uint_thres_;
 };
 
 template <typename Dtype>
-class SigmoidLayer : public NeuronLayer<Dtype> {
+class ReLULayer : public NeuronLayer<Dtype> {
  public:
-  explicit SigmoidLayer(const LayerParameter& param)
+  explicit ReLULayer(const LayerParameter& param)
       : NeuronLayer<Dtype>(param) {}
 
  protected:
@@ -89,11 +93,10 @@ class SigmoidLayer : public NeuronLayer<Dtype> {
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
 };
 
-
 template <typename Dtype>
-class BNLLLayer : public NeuronLayer<Dtype> {
+class SigmoidLayer : public NeuronLayer<Dtype> {
  public:
-  explicit BNLLLayer(const LayerParameter& param)
+  explicit SigmoidLayer(const LayerParameter& param)
       : NeuronLayer<Dtype>(param) {}
 
  protected:
@@ -101,43 +104,34 @@ class BNLLLayer : public NeuronLayer<Dtype> {
       vector<Blob<Dtype>*>* top);
   virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
 };
 
-
 template <typename Dtype>
-class DropoutLayer : public NeuronLayer<Dtype> {
+class TanHLayer : public NeuronLayer<Dtype> {
  public:
-  explicit DropoutLayer(const LayerParameter& param)
+  explicit TanHLayer(const LayerParameter& param)
       : NeuronLayer<Dtype>(param) {}
-  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
 
  protected:
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
   virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  shared_ptr<SyncedMemory> rand_vec_;
-  float threshold_;
-  float scale_;
-  unsigned int uint_thres_;
 };
 
 
 template <typename Dtype>
-class SplitLayer : public Layer<Dtype> {
+class AccuracyLayer : public Layer<Dtype> {
  public:
-  explicit SplitLayer(const LayerParameter& param)
+  explicit AccuracyLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
@@ -145,20 +139,17 @@ class SplitLayer : public Layer<Dtype> {
  protected:
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-  virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
+  // The accuracy layer should not be used to compute backward operations.
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  int count_;
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+    NOT_IMPLEMENTED;
+  }
 };
 
-
 template <typename Dtype>
-class FlattenLayer : public Layer<Dtype> {
+class ConcatLayer : public Layer<Dtype> {
  public:
-  explicit FlattenLayer(const LayerParameter& param)
+  explicit ConcatLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
@@ -172,14 +163,20 @@ class FlattenLayer : public Layer<Dtype> {
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+
+  Blob<Dtype> col_bob_;
   int count_;
+  int num_;
+  int channels_;
+  int height_;
+  int width_;
+  int concat_dim_;
 };
 
-
 template <typename Dtype>
-class InnerProductLayer : public Layer<Dtype> {
+class ConvolutionLayer : public Layer<Dtype> {
  public:
-  explicit InnerProductLayer(const LayerParameter& param)
+  explicit ConvolutionLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
@@ -189,24 +186,41 @@ class InnerProductLayer : public Layer<Dtype> {
       vector<Blob<Dtype>*>* top);
   virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+
+  int kernel_size_;
+  int stride_;
+  int num_;
+  int channels_;
+  int pad_;
+  int height_;
+  int width_;
+  int num_output_;
+  int group_;
+  Blob<Dtype> col_buffer_;
+  shared_ptr<SyncedMemory> bias_multiplier_;
+  bool bias_term_;
   int M_;
   int K_;
   int N_;
-  bool biasterm_;
-  shared_ptr<SyncedMemory> bias_multiplier_;
 };
 
+// This function is used to create a pthread that prefetches the data.
+template <typename Dtype>
+void* DataLayerPrefetch(void* layer_pointer);
 
 template <typename Dtype>
-class PaddingLayer : public Layer<Dtype> {
+class DataLayer : public Layer<Dtype> {
+  // The function used to perform prefetching.
+  friend void* DataLayerPrefetch<Dtype>(void* layer_pointer);
+
  public:
-  explicit PaddingLayer(const LayerParameter& param)
+  explicit DataLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
+  virtual ~DataLayer();
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
 
@@ -216,53 +230,47 @@ class PaddingLayer : public Layer<Dtype> {
   virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  unsigned int PAD_;
-  int NUM_;
-  int CHANNEL_;
-  int HEIGHT_IN_;
-  int WIDTH_IN_;
-  int HEIGHT_OUT_;
-  int WIDTH_OUT_;
-};
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
 
+  shared_ptr<leveldb::DB> db_;
+  shared_ptr<leveldb::Iterator> iter_;
+  int datum_channels_;
+  int datum_height_;
+  int datum_width_;
+  int datum_size_;
+  pthread_t thread_;
+  shared_ptr<Blob<Dtype> > prefetch_data_;
+  shared_ptr<Blob<Dtype> > prefetch_label_;
+  Blob<Dtype> data_mean_;
+};
 
 template <typename Dtype>
-class LRNLayer : public Layer<Dtype> {
+class EuclideanLossLayer : public Layer<Dtype> {
  public:
-  explicit LRNLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
+  explicit EuclideanLossLayer(const LayerParameter& param)
+      : Layer<Dtype>(param), difference_() {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
 
  protected:
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-  virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
+  // virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+  //     vector<Blob<Dtype>*>* top);
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  // scale_ stores the intermediate summing results
-  Blob<Dtype> scale_;
-  int size_;
-  int pre_pad_;
-  Dtype alpha_;
-  Dtype beta_;
-  int num_;
-  int channels_;
-  int height_;
-  int width_;
-};
+  // virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+  //     const bool propagate_down, vector<Blob<Dtype>*>* bottom);
 
+  Blob<Dtype> difference_;
+};
 
 template <typename Dtype>
-class Im2colLayer : public Layer<Dtype> {
+class FlattenLayer : public Layer<Dtype> {
  public:
-  explicit Im2colLayer(const LayerParameter& param)
+  explicit FlattenLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
@@ -276,21 +284,18 @@ class Im2colLayer : public Layer<Dtype> {
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  int KSIZE_;
-  int STRIDE_;
-  int CHANNELS_;
-  int HEIGHT_;
-  int WIDTH_;
-  int PAD_;
+
+  int count_;
 };
 
 template <typename Dtype>
-class PoolingLayer : public Layer<Dtype> {
+class HDF5OutputLayer : public Layer<Dtype> {
  public:
-  explicit PoolingLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
+  explicit HDF5OutputLayer(const LayerParameter& param);
+  virtual ~HDF5OutputLayer();
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
+  inline std::string file_name() const { return file_name_; }
 
  protected:
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
@@ -301,22 +306,20 @@ class PoolingLayer : public Layer<Dtype> {
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  int KSIZE_;
-  int STRIDE_;
-  int CHANNELS_;
-  int HEIGHT_;
-  int WIDTH_;
-  int POOLED_HEIGHT_;
-  int POOLED_WIDTH_;
-  Blob<float> rand_idx_;
-};
+  virtual void SaveBlobs();
 
+  std::string file_name_;
+  hid_t file_id_;
+  Blob<Dtype> data_blob_;
+  Blob<Dtype> label_blob_;
+};
 
 template <typename Dtype>
-class ConvolutionLayer : public Layer<Dtype> {
+class HDF5DataLayer : public Layer<Dtype> {
  public:
-  explicit ConvolutionLayer(const LayerParameter& param)
+  explicit HDF5DataLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
+  virtual ~HDF5DataLayer();
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
 
@@ -329,29 +332,20 @@ class ConvolutionLayer : public Layer<Dtype> {
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  Blob<Dtype> col_bob_;
+  virtual void LoadHDF5FileData(const char* filename);
 
-  int KSIZE_;
-  int STRIDE_;
-  int NUM_;
-  int CHANNELS_;
-  int PAD_;
-  int HEIGHT_;
-  int WIDTH_;
-  int NUM_OUTPUT_;
-  int GROUP_;
-  Blob<Dtype> col_buffer_;
-  shared_ptr<SyncedMemory> bias_multiplier_;
-  bool biasterm_;
-  int M_;
-  int K_;
-  int N_;
+  std::vector<std::string> hdf_filenames_;
+  unsigned int num_files_;
+  unsigned int current_file_;
+  hsize_t current_row_;
+  Blob<Dtype> data_blob_;
+  Blob<Dtype> label_blob_;
 };
 
 template <typename Dtype>
-class ConcatLayer : public Layer<Dtype> {
+class Im2colLayer : public Layer<Dtype> {
  public:
-  explicit ConcatLayer(const LayerParameter& param)
+  explicit Im2colLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
@@ -365,29 +359,28 @@ class ConcatLayer : public Layer<Dtype> {
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  Blob<Dtype> col_bob_;
 
-  int COUNT_;
-  int NUM_;
-  int CHANNELS_;
-  int HEIGHT_;
-  int WIDTH_;
-  int concat_dim_;
+  int kernel_size_;
+  int stride_;
+  int channels_;
+  int height_;
+  int width_;
+  int pad_;
 };
 
 // This function is used to create a pthread that prefetches the data.
 template <typename Dtype>
-void* DataLayerPrefetch(void* layer_pointer);
+void* ImageDataLayerPrefetch(void* layer_pointer);
 
 template <typename Dtype>
-class DataLayer : public Layer<Dtype> {
+class ImageDataLayer : public Layer<Dtype> {
   // The function used to perform prefetching.
-  friend void* DataLayerPrefetch<Dtype>(void* layer_pointer);
+  friend void* ImageDataLayerPrefetch<Dtype>(void* layer_pointer);
 
  public:
-  explicit DataLayer(const LayerParameter& param)
+  explicit ImageDataLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
-  virtual ~DataLayer();
+  virtual ~ImageDataLayer();
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
 
@@ -401,8 +394,8 @@ class DataLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
 
-  shared_ptr<leveldb::DB> db_;
-  shared_ptr<leveldb::Iterator> iter_;
+  vector<std::pair<std::string, int> > lines_;
+  int lines_id_;
   int datum_channels_;
   int datum_height_;
   int datum_width_;
@@ -413,51 +406,32 @@ class DataLayer : public Layer<Dtype> {
   Blob<Dtype> data_mean_;
 };
 
-// This function is used to create a pthread that prefetches the data.
-template <typename Dtype>
-void* ImagesLayerPrefetch(void* layer_pointer);
-
 template <typename Dtype>
-class ImagesLayer : public Layer<Dtype> {
-  // The function used to perform prefetching.
-  friend void* ImagesLayerPrefetch<Dtype>(void* layer_pointer);
-
+class InfogainLossLayer : public Layer<Dtype> {
  public:
-  explicit ImagesLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual ~ImagesLayer();
+  explicit InfogainLossLayer(const LayerParameter& param)
+      : Layer<Dtype>(param), infogain_() {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
 
  protected:
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-  virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
+  // virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+  //     vector<Blob<Dtype>*>* top);
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const bool propagate_down, vector<Blob<Dtype>*>* bottom) { return; }
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+  // virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+  //     const bool propagate_down, vector<Blob<Dtype>*>* bottom);
 
-  vector<std::pair<std::string, int> > lines_;
-  int lines_id_;
-  int datum_channels_;
-  int datum_height_;
-  int datum_width_;
-  int datum_size_;
-  pthread_t thread_;
-  shared_ptr<Blob<Dtype> > prefetch_data_;
-  shared_ptr<Blob<Dtype> > prefetch_label_;
-  Blob<Dtype> data_mean_;
+  Blob<Dtype> infogain_;
 };
 
-
 template <typename Dtype>
-class HDF5DataLayer : public Layer<Dtype> {
+class InnerProductLayer : public Layer<Dtype> {
  public:
-  explicit HDF5DataLayer(const LayerParameter& param)
+  explicit InnerProductLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
-  virtual ~HDF5DataLayer();
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
 
@@ -470,26 +444,21 @@ class HDF5DataLayer : public Layer<Dtype> {
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  virtual void load_hdf5_file_data(const char* filename);
 
-  std::vector<std::string> hdf_filenames_;
-  unsigned int num_files_;
-  unsigned int current_file_;
-  hsize_t current_row_;
-
-  Blob<Dtype> data_blob_;
-  Blob<Dtype> label_blob_;
+  int M_;
+  int K_;
+  int N_;
+  bool bias_term_;
+  shared_ptr<SyncedMemory> bias_multiplier_;
 };
 
-
 template <typename Dtype>
-class HDF5OutputLayer : public Layer<Dtype> {
+class LRNLayer : public Layer<Dtype> {
  public:
-  explicit HDF5OutputLayer(const LayerParameter& param);
-  virtual ~HDF5OutputLayer();
+  explicit LRNLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-  inline std::string file_name() const { return file_name_; }
 
  protected:
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
@@ -500,19 +469,23 @@ class HDF5OutputLayer : public Layer<Dtype> {
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  virtual void SaveBlobs();
 
-  std::string file_name_;
-  hid_t file_id_;
-  Blob<Dtype> data_blob_;
-  Blob<Dtype> label_blob_;
+  // scale_ stores the intermediate summing results
+  Blob<Dtype> scale_;
+  int size_;
+  int pre_pad_;
+  Dtype alpha_;
+  Dtype beta_;
+  int num_;
+  int channels_;
+  int height_;
+  int width_;
 };
 
-
 template <typename Dtype>
-class SoftmaxLayer : public Layer<Dtype> {
+class MultinomialLogisticLossLayer : public Layer<Dtype> {
  public:
-  explicit SoftmaxLayer(const LayerParameter& param)
+  explicit MultinomialLogisticLossLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
@@ -520,65 +493,66 @@ class SoftmaxLayer : public Layer<Dtype> {
  protected:
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-  virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
+  // virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+  //     vector<Blob<Dtype>*>* top);
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-     const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-
-  // sum_multiplier is just used to carry out sum using blas
-  Blob<Dtype> sum_multiplier_;
-  // scale is an intermediate blob to hold temporary results.
-  Blob<Dtype> scale_;
+  // virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+  //     const bool propagate_down, vector<Blob<Dtype>*>* bottom);
 };
 
-
 template <typename Dtype>
-class MultinomialLogisticLossLayer : public Layer<Dtype> {
+class PoolingLayer : public Layer<Dtype> {
  public:
-  explicit MultinomialLogisticLossLayer(const LayerParameter& param)
+  explicit PoolingLayer(const LayerParameter& param)
       : Layer<Dtype>(param) {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
 
  protected:
-  // The loss layer will do nothing during forward - all computation are
-  // carried out in the backward pass.
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-  // virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-  //     vector<Blob<Dtype>*>* top);
+  virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  // virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-  //     const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+
+  int kernel_size_;
+  int stride_;
+  int channels_;
+  int height_;
+  int width_;
+  int pooled_height_;
+  int pooled_width_;
+  Blob<float> rand_idx_;
 };
 
 template <typename Dtype>
-class InfogainLossLayer : public Layer<Dtype> {
+class SoftmaxLayer : public Layer<Dtype> {
  public:
-  explicit InfogainLossLayer(const LayerParameter& param)
-      : Layer<Dtype>(param), infogain_() {}
+  explicit SoftmaxLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
 
  protected:
-  // The loss layer will do nothing during forward - all computation are
-  // carried out in the backward pass.
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-  // virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-  //     vector<Blob<Dtype>*>* top);
+  virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  // virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-  //     const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+     const bool propagate_down, vector<Blob<Dtype>*>* bottom);
 
-  Blob<Dtype> infogain_;
+  // sum_multiplier is just used to carry out sum using blas
+  Blob<Dtype> sum_multiplier_;
+  // scale is an intermediate blob to hold temporary results.
+  Blob<Dtype> scale_;
 };
 
-
 // SoftmaxWithLossLayer is a layer that implements softmax and then computes
 // the loss - it is preferred over softmax + multinomiallogisticloss in the
 // sense that during training, this will produce more numerically stable
@@ -610,46 +584,25 @@ class SoftmaxWithLossLayer : public Layer<Dtype> {
   vector<Blob<Dtype>*> softmax_top_vec_;
 };
 
-
 template <typename Dtype>
-class EuclideanLossLayer : public Layer<Dtype> {
+class SplitLayer : public Layer<Dtype> {
  public:
-  explicit EuclideanLossLayer(const LayerParameter& param)
-      : Layer<Dtype>(param), difference_() {}
+  explicit SplitLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
   virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
 
  protected:
-  // The loss layer will do nothing during forward - all computation are
-  // carried out in the backward pass.
   virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-  // virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-  //     vector<Blob<Dtype>*>* top);
+  virtual Dtype Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  // virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-  //     const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  Blob<Dtype> difference_;
-};
-
-
-template <typename Dtype>
-class AccuracyLayer : public Layer<Dtype> {
- public:
-  explicit AccuracyLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
 
- protected:
-  virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top);
-  // The accuracy layer should not be used to compute backward operations.
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
-    NOT_IMPLEMENTED;
-  }
+  int count_;
 };
 
 // This function is used to create a pthread that prefetches the window data.
diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
index 90785af4656..f3e52a68237 100644
--- a/src/caffe/layer_factory.cpp
+++ b/src/caffe/layer_factory.cpp
@@ -9,6 +9,7 @@
 #include "caffe/vision_layers.hpp"
 #include "caffe/proto/caffe.pb.h"
 
+using std::string;
 
 namespace caffe {
 
@@ -18,59 +19,61 @@ namespace caffe {
 // but we will leave it this way for now.
 template <typename Dtype>
 Layer<Dtype>* GetLayer(const LayerParameter& param) {
-  const std::string& type = param.type();
-  if (type == "accuracy") {
+  const string& name = param.name();
+  const LayerParameter_LayerType& type = param.type();
+  switch (type) {
+  case LayerParameter_LayerType_ACCURACY:
     return new AccuracyLayer<Dtype>(param);
-  } else if (type == "bnll") {
+  case LayerParameter_LayerType_BNLL:
     return new BNLLLayer<Dtype>(param);
-  } else if (type == "concat") {
+  case LayerParameter_LayerType_CONCAT:
     return new ConcatLayer<Dtype>(param);
-  } else if (type == "conv") {
+  case LayerParameter_LayerType_CONVOLUTION:
     return new ConvolutionLayer<Dtype>(param);
-  } else if (type == "data") {
+  case LayerParameter_LayerType_DATA:
     return new DataLayer<Dtype>(param);
-  } else if (type == "dropout") {
+  case LayerParameter_LayerType_DROPOUT:
     return new DropoutLayer<Dtype>(param);
-  } else if (type == "euclidean_loss") {
+  case LayerParameter_LayerType_EUCLIDEAN_LOSS:
     return new EuclideanLossLayer<Dtype>(param);
-  } else if (type == "flatten") {
+  case LayerParameter_LayerType_FLATTEN:
     return new FlattenLayer<Dtype>(param);
-  } else if (type == "hdf5_data") {
+  case LayerParameter_LayerType_HDF5_DATA:
     return new HDF5DataLayer<Dtype>(param);
-  } else if (type == "hdf5_output") {
+  case LayerParameter_LayerType_HDF5_OUTPUT:
     return new HDF5OutputLayer<Dtype>(param);
-  } else if (type == "images") {
-    return new ImagesLayer<Dtype>(param);
-  } else if (type == "im2col") {
+  case LayerParameter_LayerType_IMAGE_DATA:
+    return new ImageDataLayer<Dtype>(param);
+  case LayerParameter_LayerType_IM2COL:
     return new Im2colLayer<Dtype>(param);
-  } else if (type == "infogain_loss") {
+  case LayerParameter_LayerType_INFOGAIN_LOSS:
     return new InfogainLossLayer<Dtype>(param);
-  } else if (type == "innerproduct") {
+  case LayerParameter_LayerType_INNER_PRODUCT:
     return new InnerProductLayer<Dtype>(param);
-  } else if (type == "lrn") {
+  case LayerParameter_LayerType_LRN:
     return new LRNLayer<Dtype>(param);
-  } else if (type == "multinomial_logistic_loss") {
+  case LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS:
     return new MultinomialLogisticLossLayer<Dtype>(param);
-  } else if (type == "padding") {
-    return new PaddingLayer<Dtype>(param);
-  } else if (type == "pool") {
+  case LayerParameter_LayerType_POOLING:
     return new PoolingLayer<Dtype>(param);
-  } else if (type == "relu") {
+  case LayerParameter_LayerType_RELU:
     return new ReLULayer<Dtype>(param);
-  } else if (type == "sigmoid") {
+  case LayerParameter_LayerType_SIGMOID:
     return new SigmoidLayer<Dtype>(param);
-  } else if (type == "softmax") {
+  case LayerParameter_LayerType_SOFTMAX:
     return new SoftmaxLayer<Dtype>(param);
-  } else if (type == "softmax_loss") {
+  case LayerParameter_LayerType_SOFTMAX_LOSS:
     return new SoftmaxWithLossLayer<Dtype>(param);
-  } else if (type == "split") {
+  case LayerParameter_LayerType_SPLIT:
     return new SplitLayer<Dtype>(param);
-  } else if (type == "tanh") {
+  case LayerParameter_LayerType_TANH:
     return new TanHLayer<Dtype>(param);
-  } else if (type == "window_data") {
+  case LayerParameter_LayerType_WINDOW_DATA:
     return new WindowDataLayer<Dtype>(param);
-  } else {
-    LOG(FATAL) << "Unknown layer name: " << type;
+  case LayerParameter_LayerType_NONE:
+    LOG(FATAL) << "Layer " << name << " has unspecified type.";
+  default:
+    LOG(FATAL) << "Layer " << name << " has unknown type " << type;
   }
   // just to suppress old compiler warnings.
   return (Layer<Dtype>*)(NULL);
diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp
index 2ce863b108f..4bbce133c51 100644
--- a/src/caffe/layers/concat_layer.cpp
+++ b/src/caffe/layers/concat_layer.cpp
@@ -15,30 +15,30 @@ void ConcatLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
     "Concat Layer takes at least two blobs as input.";
   CHECK_EQ(top->size(), 1) <<
     "Concat Layer takes a single blob as output.";
-  concat_dim_ = this->layer_param_.concat_dim();
+  concat_dim_ = this->layer_param_.concat_param().concat_dim();
   CHECK_GE(concat_dim_, 0) << "concat_dim should be >= 0";
   CHECK_LE(concat_dim_, 1) <<
     "For now concat_dim <=1, it can only concat num and channels";
   // Intialize with the first blob
-  COUNT_ = bottom[0]->count();
-  NUM_ = bottom[0]->num();
-  CHANNELS_ = bottom[0]->channels();
-  HEIGHT_ = bottom[0]->height();
-  WIDTH_ = bottom[0]->width();
+  count_ = bottom[0]->count();
+  num_ = bottom[0]->num();
+  channels_ = bottom[0]->channels();
+  height_ = bottom[0]->height();
+  width_ = bottom[0]->width();
   for (int i = 1; i < bottom.size(); ++i) {
-    COUNT_ += bottom[i]->count();
+    count_ += bottom[i]->count();
     if (concat_dim_== 0) {
-      NUM_ += bottom[i]->num();
+      num_ += bottom[i]->num();
     } else if (concat_dim_ == 1) {
-      CHANNELS_ += bottom[i]->channels();
+      channels_ += bottom[i]->channels();
     } else if (concat_dim_ == 2) {
-      HEIGHT_ += bottom[i]->height();
+      height_ += bottom[i]->height();
     } else if (concat_dim_ == 3) {
-      WIDTH_ += bottom[i]->width();
+      width_ += bottom[i]->width();
     }
   }
-  (*top)[0]->Reshape(NUM_, CHANNELS_, HEIGHT_, WIDTH_);
-  CHECK_EQ(COUNT_, (*top)[0]->count());
+  (*top)[0]->Reshape(num_, channels_, height_, width_);
+  CHECK_EQ(count_, (*top)[0]->count());
 }
 
 template <typename Dtype>
@@ -59,7 +59,7 @@ Dtype ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const Dtype* bottom_data = bottom[i]->cpu_data();
       int num_elem =
         bottom[i]->channels()*bottom[i]->height()*bottom[i]->width();
-      for (int n = 0; n < NUM_; ++n) {
+      for (int n = 0; n < num_; ++n) {
         caffe_copy(num_elem, bottom_data+bottom[i]->offset(n),
           top_data+(*top)[0]->offset(n, offset_channel));
       }
@@ -91,7 +91,7 @@ void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
       Blob<Dtype>* blob = (*bottom)[i];
       Dtype* bottom_diff = blob->mutable_cpu_diff();
       int num_elem = blob->channels()*blob->height()*blob->width();
-      for (int n = 0; n < NUM_; ++n) {
+      for (int n = 0; n < num_; ++n) {
         caffe_copy(num_elem, top_diff+top[0]->offset(n, offset_channel),
           bottom_diff+blob->offset(n));
       }
diff --git a/src/caffe/layers/concat_layer.cu b/src/caffe/layers/concat_layer.cu
index 9270f1cb270..2820bf0dfdf 100644
--- a/src/caffe/layers/concat_layer.cu
+++ b/src/caffe/layers/concat_layer.cu
@@ -17,7 +17,7 @@ Dtype ConcatLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     for (int i = 0; i < bottom.size(); ++i) {
       const Dtype* bottom_data = bottom[i]->gpu_data();
       caffe_gpu_copy(bottom[i]->count(), bottom_data,
-        top_data+(*top)[0]->offset(offset_num));
+        top_data + (*top)[0]->offset(offset_num));
       offset_num += bottom[i]->num();
     }
   } else if (concat_dim_ == 1) {
@@ -25,10 +25,10 @@ Dtype ConcatLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     for (int i = 0; i < bottom.size(); ++i) {
       const Dtype* bottom_data = bottom[i]->gpu_data();
       int num_elem =
-        bottom[i]->channels()*bottom[i]->height()*bottom[i]->width();
-      for (int n = 0; n < NUM_; ++n) {
+        bottom[i]->channels() * bottom[i]->height() * bottom[i]->width();
+      for (int n = 0; n < num_; ++n) {
         caffe_gpu_copy(num_elem, bottom_data+bottom[i]->offset(n),
-          top_data+(*top)[0]->offset(n, offset_channel));
+          top_data + (*top)[0]->offset(n, offset_channel));
       }
       offset_channel += bottom[i]->channels();
     }
@@ -49,7 +49,7 @@ void ConcatLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       Blob<Dtype>* blob = (*bottom)[i];
       Dtype* bottom_diff = blob->mutable_gpu_diff();
       caffe_gpu_copy(blob->count(),
-        top_diff+top[0]->offset(offset_num), bottom_diff);
+        top_diff + top[0]->offset(offset_num), bottom_diff);
       offset_num += blob->num();
     }
   } else if (concat_dim_ == 1) {
@@ -58,9 +58,9 @@ void ConcatLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       Blob<Dtype>* blob = (*bottom)[i];
       Dtype* bottom_diff = blob->mutable_gpu_diff();
       int num_elem = blob->channels()*blob->height()*blob->width();
-      for (int n = 0; n < NUM_; ++n) {
-        caffe_gpu_copy(num_elem, top_diff+top[0]->offset(n, offset_channel),
-          bottom_diff+blob->offset(n));
+      for (int n = 0; n < num_; ++n) {
+        caffe_gpu_copy(num_elem, top_diff + top[0]->offset(n, offset_channel),
+          bottom_diff + blob->offset(n));
       }
       offset_channel += blob->channels();
     }
diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp
index c3cacebc03e..55966b54bde 100644
--- a/src/caffe/layers/conv_layer.cpp
+++ b/src/caffe/layers/conv_layer.cpp
@@ -15,57 +15,58 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   CHECK_EQ(bottom.size(), 1) << "Conv Layer takes a single blob as input.";
   CHECK_EQ(top->size(), 1) << "Conv Layer takes a single blob as output.";
-  KSIZE_ = this->layer_param_.kernelsize();
-  STRIDE_ = this->layer_param_.stride();
-  GROUP_ = this->layer_param_.group();
-  PAD_ = this->layer_param_.pad();
-  NUM_ = bottom[0]->num();
-  CHANNELS_ = bottom[0]->channels();
-  HEIGHT_ = bottom[0]->height();
-  WIDTH_ = bottom[0]->width();
-  NUM_OUTPUT_ = this->layer_param_.num_output();
-  CHECK_GT(NUM_OUTPUT_, 0);
-  CHECK_EQ(CHANNELS_ % GROUP_, 0);
+  kernel_size_ = this->layer_param_.convolution_param().kernel_size();
+  stride_ = this->layer_param_.convolution_param().stride();
+  group_ = this->layer_param_.convolution_param().group();
+  pad_ = this->layer_param_.convolution_param().pad();
+  num_ = bottom[0]->num();
+  channels_ = bottom[0]->channels();
+  height_ = bottom[0]->height();
+  width_ = bottom[0]->width();
+  num_output_ = this->layer_param_.convolution_param().num_output();
+  CHECK_GT(num_output_, 0);
+  CHECK_EQ(channels_ % group_, 0);
   // The im2col result buffer would only hold one image at a time to avoid
   // overly large memory usage.
-  int height_out = (HEIGHT_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1;
-  int width_out = (WIDTH_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1;
-  col_buffer_.Reshape(1, CHANNELS_ * KSIZE_ * KSIZE_, height_out, width_out);
+  int height_out = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1;
+  int width_out = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1;
+  col_buffer_.Reshape(
+      1, channels_ * kernel_size_ * kernel_size_, height_out, width_out);
   // Set the parameters
-  CHECK_EQ(NUM_OUTPUT_ % GROUP_, 0)
+  CHECK_EQ(num_output_ % group_, 0)
       << "Number of output should be multiples of group.";
-  biasterm_ = this->layer_param_.biasterm();
+  bias_term_ = this->layer_param_.convolution_param().bias_term();
   // Figure out the dimensions for individual gemms.
-  M_ = NUM_OUTPUT_ / GROUP_;
-  K_ = CHANNELS_ * KSIZE_ * KSIZE_ / GROUP_;
+  M_ = num_output_ / group_;
+  K_ = channels_ * kernel_size_ * kernel_size_ / group_;
   N_ = height_out * width_out;
-  (*top)[0]->Reshape(bottom[0]->num(), NUM_OUTPUT_, height_out, width_out);
+  (*top)[0]->Reshape(bottom[0]->num(), num_output_, height_out, width_out);
   // Check if we need to set up the weights
   if (this->blobs_.size() > 0) {
     LOG(INFO) << "Skipping parameter initialization";
   } else {
-    if (biasterm_) {
+    if (bias_term_) {
       this->blobs_.resize(2);
     } else {
       this->blobs_.resize(1);
     }
     // Intialize the weight
-    this->blobs_[0].reset(
-        new Blob<Dtype>(NUM_OUTPUT_, CHANNELS_ / GROUP_, KSIZE_, KSIZE_));
+    this->blobs_[0].reset(new Blob<Dtype>(
+        num_output_, channels_ / group_, kernel_size_, kernel_size_));
     // fill the weights
-    shared_ptr<Filler<Dtype> > weight_filler(
-        GetFiller<Dtype>(this->layer_param_.weight_filler()));
+    shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
+        this->layer_param_.convolution_param().weight_filler()));
     weight_filler->Fill(this->blobs_[0].get());
     // If necessary, intiialize and fill the bias term
-    if (biasterm_) {
-      this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, NUM_OUTPUT_));
-      shared_ptr<Filler<Dtype> > bias_filler(
-          GetFiller<Dtype>(this->layer_param_.bias_filler()));
+    if (bias_term_) {
+      this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, num_output_));
+      shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
+          this->layer_param_.convolution_param().bias_filler()));
       bias_filler->Fill(this->blobs_[1].get());
     }
   }
   // Set up the bias filler
-  if (biasterm_) {
+  if (bias_term_) {
     bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype)));
     Dtype* bias_multiplier_data =
         reinterpret_cast<Dtype*>(bias_multiplier_->mutable_cpu_data());
@@ -86,19 +87,19 @@ Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   int weight_offset = M_ * K_;
   int col_offset = K_ * N_;
   int top_offset = M_ * N_;
-  for (int n = 0; n < NUM_; ++n) {
+  for (int n = 0; n < num_; ++n) {
     // First, im2col
-    im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
-                      WIDTH_, KSIZE_, PAD_, STRIDE_, col_data);
+    im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_,
+                      width_, kernel_size_, pad_, stride_, col_data);
     // Second, innerproduct with groups
-    for (int g = 0; g < GROUP_; ++g) {
+    for (int g = 0; g < group_; ++g) {
       caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
         (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
         (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
     }
     // third, add bias
-    if (biasterm_) {
-      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
+    if (bias_term_) {
+      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
           N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
           reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
           (Dtype)1., top_data + (*top)[0]->offset(n));
@@ -120,11 +121,11 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   // bias gradient if necessary
   Dtype* bias_diff = NULL;
 
-  if (biasterm_) {
+  if (bias_term_) {
     bias_diff = this->blobs_[1]->mutable_cpu_diff();
     memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count());
-    for (int n = 0; n < NUM_; ++n) {
-      caffe_cpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
+    for (int n = 0; n < num_; ++n) {
+      caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
           1., top_diff + top[0]->offset(n),
           reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
           bias_diff);
@@ -135,13 +136,13 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   int col_offset = K_ * N_;
   int top_offset = M_ * N_;
   memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
-  for (int n = 0; n < NUM_; ++n) {
+  for (int n = 0; n < num_; ++n) {
     // since we saved memory in the forward pass by not storing all col data,
     // we will need to recompute them.
-    im2col_cpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
-                      WIDTH_, KSIZE_, PAD_, STRIDE_, col_data);
+    im2col_cpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_,
+                      width_, kernel_size_, pad_, stride_, col_data);
     // gradient w.r.t. weight. Note that we will accumulate diffs.
-    for (int g = 0; g < GROUP_; ++g) {
+    for (int g = 0; g < group_; ++g) {
       caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
         (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
         col_data + col_offset * g, (Dtype)1.,
@@ -149,15 +150,15 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     }
     // gradient w.r.t. bottom data, if necessary
     if (propagate_down) {
-      for (int g = 0; g < GROUP_; ++g) {
+      for (int g = 0; g < group_; ++g) {
         caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
           (Dtype)1., weight + weight_offset * g,
           top_diff + top[0]->offset(n) + top_offset * g,
           (Dtype)0., col_diff + col_offset * g);
       }
       // col2im back to the data
-      col2im_cpu(col_diff, CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_,
-          bottom_diff + (*bottom)[0]->offset(n));
+      col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+          stride_, bottom_diff + (*bottom)[0]->offset(n));
     }
   }
 }
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu
index da76b4dfb6b..51f5d159879 100644
--- a/src/caffe/layers/conv_layer.cu
+++ b/src/caffe/layers/conv_layer.cu
@@ -20,19 +20,19 @@ Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   int weight_offset = M_ * K_;
   int col_offset = K_ * N_;
   int top_offset = M_ * N_;
-  for (int n = 0; n < NUM_; ++n) {
+  for (int n = 0; n < num_; ++n) {
     // First, im2col
-    im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
-                      WIDTH_, KSIZE_, PAD_, STRIDE_, col_data);
+    im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_,
+                      width_, kernel_size_, pad_, stride_, col_data);
     // Second, innerproduct with groups
-    for (int g = 0; g < GROUP_; ++g) {
+    for (int g = 0; g < group_; ++g) {
       caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
         (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
         (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
     }
     // third, add bias
-    if (biasterm_) {
-      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
+    if (bias_term_) {
+      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
           N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
           reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
           (Dtype)1., top_data + (*top)[0]->offset(n));
@@ -54,12 +54,12 @@ void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   // bias gradient if necessary
   Dtype* bias_diff = NULL;
 
-  if (biasterm_) {
+  if (bias_term_) {
     bias_diff = this->blobs_[1]->mutable_gpu_diff();
     CUDA_CHECK(cudaMemset(bias_diff, 0,
         sizeof(Dtype) * this->blobs_[1]->count()));
-    for (int n = 0; n < NUM_; ++n) {
-      caffe_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
+    for (int n = 0; n < num_; ++n) {
+      caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
           1., top_diff + top[0]->offset(n),
           reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
           1., bias_diff);
@@ -71,13 +71,13 @@ void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   int top_offset = M_ * N_;
   CUDA_CHECK(cudaMemset(weight_diff, 0,
       sizeof(Dtype) * this->blobs_[0]->count()));
-  for (int n = 0; n < NUM_; ++n) {
+  for (int n = 0; n < num_; ++n) {
     // since we saved memory in the forward pass by not storing all col data,
     // we will need to recompute them.
-    im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
-                      WIDTH_, KSIZE_, PAD_, STRIDE_, col_data);
+    im2col_gpu(bottom_data + (*bottom)[0]->offset(n), channels_, height_,
+                      width_, kernel_size_, pad_, stride_, col_data);
     // gradient w.r.t. weight. Note that we will accumulate diffs.
-    for (int g = 0; g < GROUP_; ++g) {
+    for (int g = 0; g < group_; ++g) {
       caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
         (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
         col_data + col_offset * g, (Dtype)1.,
@@ -85,15 +85,15 @@ void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     }
     // gradient w.r.t. bottom data, if necessary
     if (propagate_down) {
-      for (int g = 0; g < GROUP_; ++g) {
+      for (int g = 0; g < group_; ++g) {
         caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
           (Dtype)1., weight + weight_offset * g,
           top_diff + top[0]->offset(n) + top_offset * g,
           (Dtype)0., col_diff + col_offset * g);
       }
       // col2im back to the data
-      col2im_gpu(col_diff, CHANNELS_, HEIGHT_, WIDTH_, KSIZE_, PAD_, STRIDE_,
-          bottom_diff + (*bottom)[0]->offset(n));
+      col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+          stride_, bottom_diff + (*bottom)[0]->offset(n));
     }
   }
 }
diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index 1ab28c6dc62..399f771fd68 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -24,13 +24,13 @@ void* DataLayerPrefetch(void* layer_pointer) {
   CHECK(layer->prefetch_data_);
   Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
   Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
-  const Dtype scale = layer->layer_param_.scale();
-  const int batchsize = layer->layer_param_.batchsize();
-  const int cropsize = layer->layer_param_.cropsize();
-  const bool mirror = layer->layer_param_.mirror();
+  const Dtype scale = layer->layer_param_.data_param().scale();
+  const int batch_size = layer->layer_param_.data_param().batch_size();
+  const int crop_size = layer->layer_param_.data_param().crop_size();
+  const bool mirror = layer->layer_param_.data_param().mirror();
 
-  if (mirror && cropsize == 0) {
-    LOG(FATAL) << "Current implementation requires mirror and cropsize to be "
+  if (mirror && crop_size == 0) {
+    LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
         << "set at the same time.";
   }
   // datum scales
@@ -39,33 +39,33 @@ void* DataLayerPrefetch(void* layer_pointer) {
   const int width = layer->datum_width_;
   const int size = layer->datum_size_;
   const Dtype* mean = layer->data_mean_.cpu_data();
-  for (int itemid = 0; itemid < batchsize; ++itemid) {
+  for (int item_id = 0; item_id < batch_size; ++item_id) {
     // get a blob
     CHECK(layer->iter_);
     CHECK(layer->iter_->Valid());
     datum.ParseFromString(layer->iter_->value().ToString());
     const string& data = datum.data();
-    if (cropsize) {
+    if (crop_size) {
       CHECK(data.size()) << "Image cropping only support uint8 data";
       int h_off, w_off;
       // We only do random crop when we do training.
       if (Caffe::phase() == Caffe::TRAIN) {
         // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
-        h_off = rand() % (height - cropsize);
+        h_off = rand() % (height - crop_size);
         // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
-        w_off = rand() % (width - cropsize);
+        w_off = rand() % (width - crop_size);
       } else {
-        h_off = (height - cropsize) / 2;
-        w_off = (width - cropsize) / 2;
+        h_off = (height - crop_size) / 2;
+        w_off = (width - crop_size) / 2;
       }
       // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
       if (mirror && rand() % 2) {
         // Copy mirrored version
         for (int c = 0; c < channels; ++c) {
-          for (int h = 0; h < cropsize; ++h) {
-            for (int w = 0; w < cropsize; ++w) {
-              top_data[((itemid * channels + c) * cropsize + h) * cropsize
-                       + cropsize - 1 - w] =
+          for (int h = 0; h < crop_size; ++h) {
+            for (int w = 0; w < crop_size; ++w) {
+              top_data[((item_id * channels + c) * crop_size + h) * crop_size
+                       + crop_size - 1 - w] =
                   (static_cast<Dtype>(
                       (uint8_t)data[(c * height + h + h_off) * width
                                     + w + w_off])
@@ -77,9 +77,10 @@ void* DataLayerPrefetch(void* layer_pointer) {
       } else {
         // Normal copy
         for (int c = 0; c < channels; ++c) {
-          for (int h = 0; h < cropsize; ++h) {
-            for (int w = 0; w < cropsize; ++w) {
-              top_data[((itemid * channels + c) * cropsize + h) * cropsize + w]
+          for (int h = 0; h < crop_size; ++h) {
+            for (int w = 0; w < crop_size; ++w) {
+              top_data[((item_id * channels + c) * crop_size + h) * crop_size
+                       + w]
                   = (static_cast<Dtype>(
                       (uint8_t)data[(c * height + h + h_off) * width
                                     + w + w_off])
@@ -93,18 +94,18 @@ void* DataLayerPrefetch(void* layer_pointer) {
       // we will prefer to use data() first, and then try float_data()
       if (data.size()) {
         for (int j = 0; j < size; ++j) {
-          top_data[itemid * size + j] =
+          top_data[item_id * size + j] =
               (static_cast<Dtype>((uint8_t)data[j]) - mean[j]) * scale;
         }
       } else {
         for (int j = 0; j < size; ++j) {
-          top_data[itemid * size + j] =
+          top_data[item_id * size + j] =
               (datum.float_data(j) - mean[j]) * scale;
         }
       }
     }
 
-    top_label[itemid] = datum.label();
+    top_label[item_id] = datum.label();
     // go to the next iter
     layer->iter_->Next();
     if (!layer->iter_->Valid()) {
@@ -133,18 +134,19 @@ void DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   leveldb::Options options;
   options.create_if_missing = false;
   options.max_open_files = 100;
-  LOG(INFO) << "Opening leveldb " << this->layer_param_.source();
+  LOG(INFO) << "Opening leveldb " << this->layer_param_.data_param().source();
   leveldb::Status status = leveldb::DB::Open(
-      options, this->layer_param_.source(), &db_temp);
+      options, this->layer_param_.data_param().source(), &db_temp);
   CHECK(status.ok()) << "Failed to open leveldb "
-      << this->layer_param_.source() << std::endl << status.ToString();
+      << this->layer_param_.data_param().source() << std::endl
+      << status.ToString();
   db_.reset(db_temp);
   iter_.reset(db_->NewIterator(leveldb::ReadOptions()));
   iter_->SeekToFirst();
   // Check if we would need to randomly skip a few data points
-  if (this->layer_param_.rand_skip()) {
+  if (this->layer_param_.data_param().rand_skip()) {
     // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
-    unsigned int skip = rand() % this->layer_param_.rand_skip();
+    unsigned int skip = rand() % this->layer_param_.data_param().rand_skip();
     LOG(INFO) << "Skipping first " << skip << " data points.";
     while (skip-- > 0) {
       iter_->Next();
@@ -157,39 +159,41 @@ void DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   Datum datum;
   datum.ParseFromString(iter_->value().ToString());
   // image
-  int cropsize = this->layer_param_.cropsize();
-  if (cropsize > 0) {
-    (*top)[0]->Reshape(
-        this->layer_param_.batchsize(), datum.channels(), cropsize, cropsize);
+  int crop_size = this->layer_param_.data_param().crop_size();
+  if (crop_size > 0) {
+    (*top)[0]->Reshape(this->layer_param_.data_param().batch_size(),
+                       datum.channels(), crop_size, crop_size);
     prefetch_data_.reset(new Blob<Dtype>(
-        this->layer_param_.batchsize(), datum.channels(), cropsize, cropsize));
+        this->layer_param_.data_param().batch_size(), datum.channels(),
+        crop_size, crop_size));
   } else {
     (*top)[0]->Reshape(
-        this->layer_param_.batchsize(), datum.channels(), datum.height(),
-        datum.width());
+        this->layer_param_.data_param().batch_size(), datum.channels(),
+        datum.height(), datum.width());
     prefetch_data_.reset(new Blob<Dtype>(
-        this->layer_param_.batchsize(), datum.channels(), datum.height(),
-        datum.width()));
+        this->layer_param_.data_param().batch_size(), datum.channels(),
+        datum.height(), datum.width()));
   }
   LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
       << (*top)[0]->channels() << "," << (*top)[0]->height() << ","
       << (*top)[0]->width();
   // label
-  (*top)[1]->Reshape(this->layer_param_.batchsize(), 1, 1, 1);
+  (*top)[1]->Reshape(this->layer_param_.data_param().batch_size(), 1, 1, 1);
   prefetch_label_.reset(
-      new Blob<Dtype>(this->layer_param_.batchsize(), 1, 1, 1));
+      new Blob<Dtype>(this->layer_param_.data_param().batch_size(), 1, 1, 1));
   // datum size
   datum_channels_ = datum.channels();
   datum_height_ = datum.height();
   datum_width_ = datum.width();
   datum_size_ = datum.channels() * datum.height() * datum.width();
-  CHECK_GT(datum_height_, cropsize);
-  CHECK_GT(datum_width_, cropsize);
+  CHECK_GT(datum_height_, crop_size);
+  CHECK_GT(datum_width_, crop_size);
   // check if we want to have mean
-  if (this->layer_param_.has_meanfile()) {
+  if (this->layer_param_.data_param().has_mean_file()) {
+    const string& mean_file = this->layer_param_.data_param().mean_file();
+    LOG(INFO) << "Loading mean file from" << mean_file;
     BlobProto blob_proto;
-    LOG(INFO) << "Loading mean file from" << this->layer_param_.meanfile();
-    ReadProtoFromBinaryFile(this->layer_param_.meanfile().c_str(), &blob_proto);
+    ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
     data_mean_.FromProto(blob_proto);
     CHECK_EQ(data_mean_.num(), 1);
     CHECK_EQ(data_mean_.channels(), datum_channels_);
diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp
index 78acf3a3c4e..5dbba5d550c 100644
--- a/src/caffe/layers/dropout_layer.cpp
+++ b/src/caffe/layers/dropout_layer.cpp
@@ -16,7 +16,7 @@ void DropoutLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   NeuronLayer<Dtype>::SetUp(bottom, top);
   // Set up the cache for random number generation
   rand_vec_.reset(new SyncedMemory(bottom[0]->count() * sizeof(int)));
-  threshold_ = this->layer_param_.dropout_ratio();
+  threshold_ = this->layer_param_.dropout_param().dropout_ratio();
   DCHECK(threshold_ > 0.);
   DCHECK(threshold_ < 1.);
   scale_ = 1. / (1. - threshold_);
diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp
index 03baf5f5377..cff4f7c7318 100644
--- a/src/caffe/layers/hdf5_data_layer.cpp
+++ b/src/caffe/layers/hdf5_data_layer.cpp
@@ -26,7 +26,7 @@ HDF5DataLayer<Dtype>::~HDF5DataLayer<Dtype>() { }
 
 // Load data and label from HDF5 filename into the class property blobs.
 template <typename Dtype>
-void HDF5DataLayer<Dtype>::load_hdf5_file_data(const char* filename) {
+void HDF5DataLayer<Dtype>::LoadHDF5FileData(const char* filename) {
   LOG(INFO) << "Loading HDF5 file" << filename;
   hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
   if (file_id < 0) {
@@ -56,28 +56,30 @@ void HDF5DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   CHECK_EQ(top->size(), 2) << "HDF5DataLayer takes two blobs as output.";
 
   // Read the source to parse the filenames.
-  LOG(INFO) << "Loading filename from " << this->layer_param_.source();
+  const string& source = this->layer_param_.hdf5_data_param().source();
+  LOG(INFO) << "Loading filename from " << source;
   hdf_filenames_.clear();
-  std::ifstream myfile(this->layer_param_.source().c_str());
-  if (myfile.is_open()) {
+  std::ifstream source_file(source.c_str());
+  if (source_file.is_open()) {
     std::string line;
-    while (myfile >> line) {
+    while (source_file >> line) {
       hdf_filenames_.push_back(line);
     }
   }
-  myfile.close();
+  source_file.close();
   num_files_ = hdf_filenames_.size();
   current_file_ = 0;
   LOG(INFO) << "Number of files: " << num_files_;
 
   // Load the first HDF5 file and initialize the line counter.
-  load_hdf5_file_data(hdf_filenames_[current_file_].c_str());
+  LoadHDF5FileData(hdf_filenames_[current_file_].c_str());
   current_row_ = 0;
 
   // Reshape blobs.
-  (*top)[0]->Reshape(this->layer_param_.batchsize(), data_blob_.channels(),
+  const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
+  (*top)[0]->Reshape(batch_size, data_blob_.channels(),
                      data_blob_.width(), data_blob_.height());
-  (*top)[1]->Reshape(this->layer_param_.batchsize(), label_blob_.channels(),
+  (*top)[1]->Reshape(batch_size, label_blob_.channels(),
                      label_blob_.width(), label_blob_.height());
   LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
       << (*top)[0]->channels() << "," << (*top)[0]->height() << ","
@@ -87,29 +89,25 @@ void HDF5DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 Dtype HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
-  const int batchsize = this->layer_param_.batchsize();
+  const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
   const int data_count = (*top)[0]->count() / (*top)[0]->num();
   const int label_data_count = (*top)[1]->count() / (*top)[1]->num();
 
-  for (int i = 0; i < batchsize; ++i, ++current_row_) {
+  for (int i = 0; i < batch_size; ++i, ++current_row_) {
     if (current_row_ == data_blob_.num()) {
       if (num_files_ > 1) {
         current_file_ += 1;
-
         if (current_file_ == num_files_) {
           current_file_ = 0;
           LOG(INFO) << "looping around to first file";
         }
-
-        load_hdf5_file_data(hdf_filenames_[current_file_].c_str());
+        LoadHDF5FileData(hdf_filenames_[current_file_].c_str());
       }
       current_row_ = 0;
     }
-
     memcpy(&(*top)[0]->mutable_cpu_data()[i * data_count],
            &data_blob_.cpu_data()[current_row_ * data_count],
            sizeof(Dtype) * data_count);
-
     memcpy(&(*top)[1]->mutable_cpu_data()[i * label_data_count],
             &label_blob_.cpu_data()[current_row_ * label_data_count],
             sizeof(Dtype) * label_data_count);
diff --git a/src/caffe/layers/hdf5_data_layer.cu b/src/caffe/layers/hdf5_data_layer.cu
index 2f743ef16d5..9c5bb5a818f 100644
--- a/src/caffe/layers/hdf5_data_layer.cu
+++ b/src/caffe/layers/hdf5_data_layer.cu
@@ -22,11 +22,11 @@ namespace caffe {
 template <typename Dtype>
 Dtype HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
-  const int batchsize = this->layer_param_.batchsize();
+  const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
   const int data_count = (*top)[0]->count() / (*top)[0]->num();
   const int label_data_count = (*top)[1]->count() / (*top)[1]->num();
 
-  for (int i = 0; i < batchsize; ++i, ++current_row_) {
+  for (int i = 0; i < batch_size; ++i, ++current_row_) {
     if (current_row_ == data_blob_.num()) {
       if (num_files_ > 1) {
         current_file_ += 1;
@@ -36,17 +36,15 @@ Dtype HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
           LOG(INFO) << "looping around to first file";
         }
 
-        load_hdf5_file_data(hdf_filenames_[current_file_].c_str());
+        LoadHDF5FileData(hdf_filenames_[current_file_].c_str());
       }
       current_row_ = 0;
     }
-
     CUDA_CHECK(cudaMemcpy(
             &(*top)[0]->mutable_gpu_data()[i * data_count],
             &data_blob_.cpu_data()[current_row_ * data_count],
             sizeof(Dtype) * data_count,
             cudaMemcpyHostToDevice));
-
     CUDA_CHECK(cudaMemcpy(
             &(*top)[1]->mutable_gpu_data()[i * label_data_count],
             &label_blob_.cpu_data()[current_row_ * label_data_count],
diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp
index fc9f52e745f..749ea3c2d6a 100644
--- a/src/caffe/layers/im2col_layer.cpp
+++ b/src/caffe/layers/im2col_layer.cpp
@@ -14,15 +14,15 @@ void Im2colLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   CHECK_EQ(bottom.size(), 1) << "Im2col Layer takes a single blob as input.";
   CHECK_EQ(top->size(), 1) << "Im2col Layer takes a single blob as output.";
-  KSIZE_ = this->layer_param_.kernelsize();
-  STRIDE_ = this->layer_param_.stride();
-  PAD_ = this->layer_param_.pad();
-  CHANNELS_ = bottom[0]->channels();
-  HEIGHT_ = bottom[0]->height();
-  WIDTH_ = bottom[0]->width();
-  (*top)[0]->Reshape(bottom[0]->num(), CHANNELS_ * KSIZE_ * KSIZE_,
-      (HEIGHT_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1,
-      (WIDTH_ + 2 * PAD_ - KSIZE_) / STRIDE_ + 1);
+  kernel_size_ = this->layer_param_.convolution_param().kernel_size();
+  stride_ = this->layer_param_.convolution_param().stride();
+  pad_ = this->layer_param_.convolution_param().pad();
+  channels_ = bottom[0]->channels();
+  height_ = bottom[0]->height();
+  width_ = bottom[0]->width();
+  (*top)[0]->Reshape(bottom[0]->num(), channels_ * kernel_size_ * kernel_size_,
+      (height_ + 2 * pad_ - kernel_size_) / stride_ + 1,
+      (width_ + 2 * pad_ - kernel_size_) / stride_ + 1);
 }
 
 template <typename Dtype>
@@ -31,8 +31,8 @@ Dtype Im2colLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   const Dtype* bottom_data = bottom[0]->cpu_data();
   Dtype* top_data = (*top)[0]->mutable_cpu_data();
   for (int n = 0; n < bottom[0]->num(); ++n) {
-    im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
-        WIDTH_, KSIZE_, PAD_, STRIDE_, top_data + (*top)[0]->offset(n));
+    im2col_cpu(bottom_data + bottom[0]->offset(n), channels_, height_,
+        width_, kernel_size_, pad_, stride_, top_data + (*top)[0]->offset(n));
   }
   return Dtype(0.);
 }
@@ -43,8 +43,8 @@ void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->cpu_diff();
   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
   for (int n = 0; n < top[0]->num(); ++n) {
-    col2im_cpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_,
-        WIDTH_, KSIZE_, PAD_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
+    col2im_cpu(top_diff + top[0]->offset(n), channels_, height_, width_,
+        kernel_size_, pad_, stride_, bottom_diff + (*bottom)[0]->offset(n));
   }
 }
 
diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu
index 4074d9e424d..26bc1b97959 100644
--- a/src/caffe/layers/im2col_layer.cu
+++ b/src/caffe/layers/im2col_layer.cu
@@ -15,8 +15,8 @@ Dtype Im2colLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = (*top)[0]->mutable_gpu_data();
   for (int n = 0; n < bottom[0]->num(); ++n) {
-    im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
-        WIDTH_, KSIZE_, PAD_, STRIDE_, top_data + (*top)[0]->offset(n));
+    im2col_gpu(bottom_data + bottom[0]->offset(n), channels_, height_,
+        width_, kernel_size_, pad_, stride_, top_data + (*top)[0]->offset(n));
   }
   return Dtype(0.);
 }
@@ -27,8 +27,8 @@ void Im2colLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
   for (int n = 0; n < top[0]->num(); ++n) {
-    col2im_gpu(top_diff + top[0]->offset(n), CHANNELS_, HEIGHT_,
-        WIDTH_, KSIZE_, PAD_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
+    col2im_gpu(top_diff + top[0]->offset(n), channels_, height_, width_,
+        kernel_size_, pad_, stride_, bottom_diff + (*bottom)[0]->offset(n));
   }
 }
 
diff --git a/src/caffe/layers/images_layer.cpp b/src/caffe/layers/image_data_layer.cpp
similarity index 63%
rename from src/caffe/layers/images_layer.cpp
rename to src/caffe/layers/image_data_layer.cpp
index 5154f9a3b61..8c23cc41053 100644
--- a/src/caffe/layers/images_layer.cpp
+++ b/src/caffe/layers/image_data_layer.cpp
@@ -19,24 +19,25 @@ using std::pair;
 namespace caffe {
 
 template <typename Dtype>
-void* ImagesLayerPrefetch(void* layer_pointer) {
+void* ImageDataLayerPrefetch(void* layer_pointer) {
   CHECK(layer_pointer);
-  ImagesLayer<Dtype>* layer =
-      reinterpret_cast<ImagesLayer<Dtype>*>(layer_pointer);
+  ImageDataLayer<Dtype>* layer =
+      reinterpret_cast<ImageDataLayer<Dtype>*>(layer_pointer);
   CHECK(layer);
   Datum datum;
   CHECK(layer->prefetch_data_);
   Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
   Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
-  const Dtype scale = layer->layer_param_.scale();
-  const int batchsize = layer->layer_param_.batchsize();
-  const int cropsize = layer->layer_param_.cropsize();
-  const bool mirror = layer->layer_param_.mirror();
-  const int new_height  = layer->layer_param_.new_height();
-  const int new_width  = layer->layer_param_.new_height();
-
-  if (mirror && cropsize == 0) {
-    LOG(FATAL) << "Current implementation requires mirror and cropsize to be "
+  ImageDataParameter image_data_param = layer->layer_param_.image_data_param();
+  const Dtype scale = image_data_param.scale();
+  const int batch_size = image_data_param.batch_size();
+  const int crop_size = image_data_param.crop_size();
+  const bool mirror = image_data_param.mirror();
+  const int new_height = image_data_param.new_height();
+  const int new_width = image_data_param.new_width();
+
+  if (mirror && crop_size == 0) {
+    LOG(FATAL) << "Current implementation requires mirror and crop_size to be "
         << "set at the same time.";
   }
   // datum scales
@@ -46,7 +47,7 @@ void* ImagesLayerPrefetch(void* layer_pointer) {
   const int size = layer->datum_size_;
   const int lines_size = layer->lines_.size();
   const Dtype* mean = layer->data_mean_.cpu_data();
-  for (int itemid = 0; itemid < batchsize; ++itemid) {
+  for (int item_id = 0; item_id < batch_size; ++item_id) {
     // get a blob
     CHECK_GT(lines_size, layer->lines_id_);
     if (!ReadImageToDatum(layer->lines_[layer->lines_id_].first,
@@ -55,27 +56,27 @@ void* ImagesLayerPrefetch(void* layer_pointer) {
       continue;
     }
     const string& data = datum.data();
-    if (cropsize) {
+    if (crop_size) {
       CHECK(data.size()) << "Image cropping only support uint8 data";
       int h_off, w_off;
       // We only do random crop when we do training.
       if (Caffe::phase() == Caffe::TRAIN) {
         // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
-        h_off = rand() % (height - cropsize);
+        h_off = rand() % (height - crop_size);
         // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
-        w_off = rand() % (width - cropsize);
+        w_off = rand() % (width - crop_size);
       } else {
-        h_off = (height - cropsize) / 2;
-        w_off = (width - cropsize) / 2;
+        h_off = (height - crop_size) / 2;
+        w_off = (width - crop_size) / 2;
       }
       // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
       if (mirror && rand() % 2) {
         // Copy mirrored version
         for (int c = 0; c < channels; ++c) {
-          for (int h = 0; h < cropsize; ++h) {
-            for (int w = 0; w < cropsize; ++w) {
-              top_data[((itemid * channels + c) * cropsize + h) * cropsize
-                       + cropsize - 1 - w] =
+          for (int h = 0; h < crop_size; ++h) {
+            for (int w = 0; w < crop_size; ++w) {
+              top_data[((item_id * channels + c) * crop_size + h) * crop_size
+                       + crop_size - 1 - w] =
                   (static_cast<Dtype>(
                       (uint8_t)data[(c * height + h + h_off) * width
                                     + w + w_off])
@@ -87,9 +88,10 @@ void* ImagesLayerPrefetch(void* layer_pointer) {
       } else {
         // Normal copy
         for (int c = 0; c < channels; ++c) {
-          for (int h = 0; h < cropsize; ++h) {
-            for (int w = 0; w < cropsize; ++w) {
-              top_data[((itemid * channels + c) * cropsize + h) * cropsize + w]
+          for (int h = 0; h < crop_size; ++h) {
+            for (int w = 0; w < crop_size; ++w) {
+              top_data[((item_id * channels + c) * crop_size + h)
+                       * crop_size + w]
                   = (static_cast<Dtype>(
                       (uint8_t)data[(c * height + h + h_off) * width
                                     + w + w_off])
@@ -103,25 +105,25 @@ void* ImagesLayerPrefetch(void* layer_pointer) {
       // Just copy the whole data
       if (data.size()) {
         for (int j = 0; j < size; ++j) {
-          top_data[itemid * size + j] =
+          top_data[item_id * size + j] =
               (static_cast<Dtype>((uint8_t)data[j]) - mean[j]) * scale;
         }
       } else {
         for (int j = 0; j < size; ++j) {
-          top_data[itemid * size + j] =
+          top_data[item_id * size + j] =
               (datum.float_data(j) - mean[j]) * scale;
         }
       }
     }
 
-    top_label[itemid] = datum.label();
+    top_label[item_id] = datum.label();
     // go to the next iter
     layer->lines_id_++;
     if (layer->lines_id_ >= lines_size) {
       // We have reached the end. Restart from the first.
       DLOG(INFO) << "Restarting data prefetching from start.";
       layer->lines_id_ = 0;
-      if (layer->layer_param_.shuffle_images()) {
+      if (layer->layer_param_.image_data_param().shuffle()) {
         std::random_shuffle(layer->lines_.begin(), layer->lines_.end());
       }
     }
@@ -131,32 +133,32 @@ void* ImagesLayerPrefetch(void* layer_pointer) {
 }
 
 template <typename Dtype>
-ImagesLayer<Dtype>::~ImagesLayer<Dtype>() {
+ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {
   // Finally, join the thread
   CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed.";
 }
 
 template <typename Dtype>
-void ImagesLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+void ImageDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   CHECK_EQ(bottom.size(), 0) << "Input Layer takes no input blobs.";
   CHECK_EQ(top->size(), 2) << "Input Layer takes two blobs as output.";
-  const int new_height = this->layer_param_.new_height();
-  const int new_width = this->layer_param_.new_height();
+  const int new_height  = this->layer_param_.image_data_param().new_height();
+  const int new_width  = this->layer_param_.image_data_param().new_height();
   CHECK((new_height == 0 && new_width == 0) ||
-      (new_height > 0 && new_width > 0)) <<
-  "Current implementation requires new_height and new_width to be set"
-  "at the same time.";
+      (new_height > 0 && new_width > 0)) << "Current implementation requires "
+      "new_height and new_width to be set at the same time.";
   // Read the file with filenames and labels
-  LOG(INFO) << "Opening file " << this->layer_param_.source();
-  std::ifstream infile(this->layer_param_.source().c_str());
+  const string& source = this->layer_param_.image_data_param().source();
+  LOG(INFO) << "Opening file " << source;
+  std::ifstream infile(source.c_str());
   string filename;
   int label;
   while (infile >> filename >> label) {
     lines_.push_back(std::make_pair(filename, label));
   }
 
-  if (this->layer_param_.shuffle_images()) {
+  if (this->layer_param_.image_data_param().shuffle()) {
     // randomly shuffle data
     LOG(INFO) << "Shuffling data";
     std::random_shuffle(lines_.begin(), lines_.end());
@@ -165,9 +167,10 @@ void ImagesLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 
   lines_id_ = 0;
   // Check if we would need to randomly skip a few data points
-  if (this->layer_param_.rand_skip()) {
+  if (this->layer_param_.image_data_param().rand_skip()) {
     // NOLINT_NEXT_LINE(runtime/threadsafe_fn)
-    unsigned int skip = rand() % this->layer_param_.rand_skip();
+    unsigned int skip = rand() %
+        this->layer_param_.image_data_param().rand_skip();
     LOG(INFO) << "Skipping first " << skip << " data points.";
     CHECK_GT(lines_.size(), skip) << "Not enought points to skip";
     lines_id_ = skip;
@@ -177,39 +180,37 @@ void ImagesLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   CHECK(ReadImageToDatum(lines_[lines_id_].first, lines_[lines_id_].second,
                          new_height, new_width, &datum));
   // image
-  int cropsize = this->layer_param_.cropsize();
-  if (cropsize > 0) {
-    (*top)[0]->Reshape(
-        this->layer_param_.batchsize(), datum.channels(), cropsize, cropsize);
-    prefetch_data_.reset(new Blob<Dtype>(
-        this->layer_param_.batchsize(), datum.channels(), cropsize, cropsize));
+  const int crop_size = this->layer_param_.image_data_param().crop_size();
+  const int batch_size = this->layer_param_.image_data_param().batch_size();
+  const string& mean_file = this->layer_param_.image_data_param().mean_file();
+  if (crop_size > 0) {
+    (*top)[0]->Reshape(batch_size, datum.channels(), crop_size, crop_size);
+    prefetch_data_.reset(new Blob<Dtype>(batch_size, datum.channels(),
+                                         crop_size, crop_size));
   } else {
-    (*top)[0]->Reshape(
-        this->layer_param_.batchsize(), datum.channels(), datum.height(),
-        datum.width());
-    prefetch_data_.reset(new Blob<Dtype>(
-        this->layer_param_.batchsize(), datum.channels(), datum.height(),
-        datum.width()));
+    (*top)[0]->Reshape(batch_size, datum.channels(), datum.height(),
+                       datum.width());
+    prefetch_data_.reset(new Blob<Dtype>(batch_size, datum.channels(),
+                                         datum.height(), datum.width()));
   }
   LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
       << (*top)[0]->channels() << "," << (*top)[0]->height() << ","
       << (*top)[0]->width();
   // label
-  (*top)[1]->Reshape(this->layer_param_.batchsize(), 1, 1, 1);
-  prefetch_label_.reset(
-      new Blob<Dtype>(this->layer_param_.batchsize(), 1, 1, 1));
+  (*top)[1]->Reshape(batch_size, 1, 1, 1);
+  prefetch_label_.reset(new Blob<Dtype>(batch_size, 1, 1, 1));
   // datum size
   datum_channels_ = datum.channels();
   datum_height_ = datum.height();
   datum_width_ = datum.width();
   datum_size_ = datum.channels() * datum.height() * datum.width();
-  CHECK_GT(datum_height_, cropsize);
-  CHECK_GT(datum_width_, cropsize);
+  CHECK_GT(datum_height_, crop_size);
+  CHECK_GT(datum_width_, crop_size);
   // check if we want to have mean
-  if (this->layer_param_.has_meanfile()) {
+  if (this->layer_param_.image_data_param().has_mean_file()) {
     BlobProto blob_proto;
-    LOG(INFO) << "Loading mean file from" << this->layer_param_.meanfile();
-    ReadProtoFromBinaryFile(this->layer_param_.meanfile().c_str(), &blob_proto);
+    LOG(INFO) << "Loading mean file from" << mean_file;
+    ReadProtoFromBinaryFile(mean_file.c_str(), &blob_proto);
     data_mean_.FromProto(blob_proto);
     CHECK_EQ(data_mean_.num(), 1);
     CHECK_EQ(data_mean_.channels(), datum_channels_);
@@ -227,13 +228,13 @@ void ImagesLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   prefetch_label_->mutable_cpu_data();
   data_mean_.cpu_data();
   DLOG(INFO) << "Initializing prefetch";
-  CHECK(!pthread_create(&thread_, NULL, ImagesLayerPrefetch<Dtype>,
+  CHECK(!pthread_create(&thread_, NULL, ImageDataLayerPrefetch<Dtype>,
       reinterpret_cast<void*>(this))) << "Pthread execution failed.";
   DLOG(INFO) << "Prefetch initialized.";
 }
 
 template <typename Dtype>
-Dtype ImagesLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+Dtype ImageDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   // First, join the thread
   CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed.";
@@ -243,11 +244,11 @@ Dtype ImagesLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   memcpy((*top)[1]->mutable_cpu_data(), prefetch_label_->cpu_data(),
       sizeof(Dtype) * prefetch_label_->count());
   // Start a new prefetch thread
-  CHECK(!pthread_create(&thread_, NULL, ImagesLayerPrefetch<Dtype>,
+  CHECK(!pthread_create(&thread_, NULL, ImageDataLayerPrefetch<Dtype>,
       reinterpret_cast<void*>(this))) << "Pthread execution failed.";
   return Dtype(0.);
 }
 
-INSTANTIATE_CLASS(ImagesLayer);
+INSTANTIATE_CLASS(ImageDataLayer);
 
 }  // namespace caffe
diff --git a/src/caffe/layers/images_layer.cu b/src/caffe/layers/image_data_layer.cu
similarity index 86%
rename from src/caffe/layers/images_layer.cu
rename to src/caffe/layers/image_data_layer.cu
index 9a73daaed27..7b4952d7768 100644
--- a/src/caffe/layers/images_layer.cu
+++ b/src/caffe/layers/image_data_layer.cu
@@ -22,7 +22,7 @@ using std::pair;
 namespace caffe {
 
 template <typename Dtype>
-Dtype ImagesLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+Dtype ImageDataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   // First, join the thread
   CHECK(!pthread_join(thread_, NULL)) << "Pthread joining failed.";
@@ -34,11 +34,11 @@ Dtype ImagesLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       prefetch_label_->cpu_data(), sizeof(Dtype) * prefetch_label_->count(),
       cudaMemcpyHostToDevice));
   // Start a new prefetch thread
-  CHECK(!pthread_create(&thread_, NULL, ImagesLayerPrefetch<Dtype>,
+  CHECK(!pthread_create(&thread_, NULL, ImageDataLayerPrefetch<Dtype>,
       reinterpret_cast<void*>(this))) << "Pthread execution failed.";
   return Dtype(0.);
 }
 
-INSTANTIATE_CLASS(ImagesLayer);
+INSTANTIATE_CLASS(ImageDataLayer);
 
 }  // namespace caffe
diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
index 9b2c7ef799a..c60261e9486 100644
--- a/src/caffe/layers/inner_product_layer.cpp
+++ b/src/caffe/layers/inner_product_layer.cpp
@@ -16,8 +16,8 @@ void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   CHECK_EQ(bottom.size(), 1) << "IP Layer takes a single blob as input.";
   CHECK_EQ(top->size(), 1) << "IP Layer takes a single blob as output.";
-  const int num_output = this->layer_param_.num_output();
-  biasterm_ = this->layer_param_.biasterm();
+  const int num_output = this->layer_param_.inner_product_param().num_output();
+  bias_term_ = this->layer_param_.inner_product_param().bias_term();
   // Figure out the dimensions
   M_ = bottom[0]->num();
   K_ = bottom[0]->count() / bottom[0]->num();
@@ -27,7 +27,7 @@ void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   if (this->blobs_.size() > 0) {
     LOG(INFO) << "Skipping parameter initialization";
   } else {
-    if (biasterm_) {
+    if (bias_term_) {
       this->blobs_.resize(2);
     } else {
       this->blobs_.resize(1);
@@ -35,19 +35,19 @@ void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
     // Intialize the weight
     this->blobs_[0].reset(new Blob<Dtype>(1, 1, N_, K_));
     // fill the weights
-    shared_ptr<Filler<Dtype> > weight_filler(
-        GetFiller<Dtype>(this->layer_param_.weight_filler()));
+    shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
+        this->layer_param_.inner_product_param().weight_filler()));
     weight_filler->Fill(this->blobs_[0].get());
     // If necessary, intiialize and fill the bias term
-    if (biasterm_) {
+    if (bias_term_) {
       this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, N_));
-      shared_ptr<Filler<Dtype> > bias_filler(
-          GetFiller<Dtype>(this->layer_param_.bias_filler()));
+      shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
+          this->layer_param_.inner_product_param().bias_filler()));
       bias_filler->Fill(this->blobs_[1].get());
     }
   }  // parameter initialization
   // Setting up the bias multiplier
-  if (biasterm_) {
+  if (bias_term_) {
     bias_multiplier_.reset(new SyncedMemory(M_ * sizeof(Dtype)));
     Dtype* bias_multiplier_data =
         reinterpret_cast<Dtype*>(bias_multiplier_->mutable_cpu_data());
@@ -65,7 +65,7 @@ Dtype InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   const Dtype* weight = this->blobs_[0]->cpu_data();
   caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1.,
       bottom_data, weight, (Dtype)0., top_data);
-  if (biasterm_) {
+  if (bias_term_) {
     caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
         reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
         this->blobs_[1]->cpu_data(), (Dtype)1., top_data);
@@ -82,7 +82,7 @@ void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   // Gradient with respect to weight
   caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
       top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff());
-  if (biasterm_) {
+  if (bias_term_) {
     // Gradient with respect to bias
     caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
         reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), (Dtype)0.,
diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu
index ac0e0be12d6..f139c23c310 100644
--- a/src/caffe/layers/inner_product_layer.cu
+++ b/src/caffe/layers/inner_product_layer.cu
@@ -21,7 +21,7 @@ Dtype InnerProductLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   const Dtype* weight = this->blobs_[0]->gpu_data();
   caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1.,
       bottom_data, weight, (Dtype)0., top_data);
-  if (biasterm_) {
+  if (bias_term_) {
     caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
         reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
         this->blobs_[1]->gpu_data(), (Dtype)1., top_data);
@@ -38,7 +38,7 @@ void InnerProductLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   // Gradient with respect to weight
   caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
       top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_gpu_diff());
-  if (biasterm_) {
+  if (bias_term_) {
     // Gradient with respect to bias
     caffe_gpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
         reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp
index e232008266a..2945d268ef8 100644
--- a/src/caffe/layers/loss_layer.cpp
+++ b/src/caffe/layers/loss_layer.cpp
@@ -73,7 +73,8 @@ void InfogainLossLayer<Dtype>::SetUp(
   CHECK_EQ(bottom[1]->height(), 1);
   CHECK_EQ(bottom[1]->width(), 1);
   BlobProto blob_proto;
-  ReadProtoFromBinaryFile(this->layer_param_.source(), &blob_proto);
+  ReadProtoFromBinaryFile(this->layer_param_.infogain_loss_param().source(),
+                          &blob_proto);
   infogain_.FromProto(blob_proto);
   CHECK_EQ(infogain_.num(), 1);
   CHECK_EQ(infogain_.channels(), 1);
diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp
index c43aaf46517..e95bc8a61c9 100644
--- a/src/caffe/layers/lrn_layer.cpp
+++ b/src/caffe/layers/lrn_layer.cpp
@@ -21,10 +21,10 @@ void LRNLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   width_ = bottom[0]->width();
   (*top)[0]->Reshape(num_, channels_, height_, width_);
   scale_.Reshape(num_, channels_, height_, width_);
-  size_ = this->layer_param_.local_size();
+  size_ = this->layer_param_.lrn_param().local_size();
   pre_pad_ = (size_ - 1) / 2;
-  alpha_ = this->layer_param_.alpha();
-  beta_ = this->layer_param_.beta();
+  alpha_ = this->layer_param_.lrn_param().alpha();
+  beta_ = this->layer_param_.lrn_param().beta();
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/padding_layer.cpp b/src/caffe/layers/padding_layer.cpp
deleted file mode 100644
index 6b22638554b..00000000000
--- a/src/caffe/layers/padding_layer.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright 2014 BVLC and contributors.
-
-#include <iostream>  // NOLINT(readability/streams)
-#include <vector>
-
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void PaddingLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top) {
-  // DEPRECATION
-  LOG(WARNING) << "Padding layers are deprecated in favor of padding-aware "
-                  "convolutions and WILL BE REMOVED. Please update your model "
-                  "prototxt to replace padding layers with pad fields. "
-                  "See https://github.com/BVLC/caffe/pull/128.";
-  PAD_ = this->layer_param_.pad();
-  CHECK_EQ(bottom.size(), 1) << "Padding Layer takes a single blob as input.";
-  CHECK_EQ(top->size(), 1) << "Padding Layer takes a single blob as output.";
-  NUM_ = bottom[0]->num();
-  CHANNEL_ = bottom[0]->channels();
-  HEIGHT_IN_ = bottom[0]->height();
-  WIDTH_IN_ = bottom[0]->width();
-  HEIGHT_OUT_ = HEIGHT_IN_ + PAD_ * 2;
-  WIDTH_OUT_ = WIDTH_IN_ + PAD_ * 2;
-  (*top)[0]->Reshape(NUM_, CHANNEL_, HEIGHT_OUT_, WIDTH_OUT_);
-}
-
-template <typename Dtype>
-Dtype PaddingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      vector<Blob<Dtype>*>* top) {
-  Dtype* top_data = (*top)[0]->mutable_cpu_data();
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  memset(top_data, 0, sizeof(Dtype) * (*top)[0]->count());
-  // In short, top[n, c, h, w] = bottom[n, c, h-pad, w-pad] if in range
-  for (int n = 0; n < NUM_; ++n) {
-    for (int c = 0; c < CHANNEL_; ++c) {
-      for (int h = 0; h < HEIGHT_IN_; ++h) {
-        // copy the width part
-        memcpy(
-            top_data + ((n * CHANNEL_ + c) * HEIGHT_OUT_ + h + PAD_)
-                * WIDTH_OUT_ + PAD_,
-            bottom_data + ((n * CHANNEL_ + c) * HEIGHT_IN_ + h) * WIDTH_IN_,
-            sizeof(Dtype) * WIDTH_IN_);
-      }
-    }
-  }
-  return Dtype(0.);
-}
-
-template <typename Dtype>
-void PaddingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
-  const Dtype* top_diff = top[0]->cpu_diff();
-  Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
-  for (int n = 0; n < NUM_; ++n) {
-    for (int c = 0; c < CHANNEL_; ++c) {
-      for (int h = 0; h < HEIGHT_IN_; ++h) {
-        // copy the width part
-        memcpy(
-            bottom_diff + ((n * CHANNEL_ + c) * HEIGHT_IN_ + h) * WIDTH_IN_,
-            top_diff + ((n * CHANNEL_ + c) * HEIGHT_OUT_ + h + PAD_)
-                * WIDTH_OUT_ + PAD_,
-            sizeof(Dtype) * WIDTH_IN_);
-      }
-    }
-  }
-}
-
-INSTANTIATE_CLASS(PaddingLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/padding_layer.cu b/src/caffe/layers/padding_layer.cu
deleted file mode 100644
index 8023fef0347..00000000000
--- a/src/caffe/layers/padding_layer.cu
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2014 BVLC and contributors.
-
-#include <iostream>  // NOLINT(readability/streams)
-#include <vector>
-
-#include "caffe/layer.hpp"
-#include "caffe/vision_layers.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-__global__ void PaddingForward(const int count, const Dtype* in, Dtype* out,
-    const int num, const int channel, const int height_in, const int width_in,
-    const int pad) {
-  CUDA_KERNEL_LOOP(index, count) {
-    int height_out = height_in + pad + pad;
-    int width_out = width_in + pad + pad;
-    int w = index % width_in;
-    index /= width_in;
-    int h = index % height_in;
-    index /= height_in;
-    int c = index % channel;
-    index /= channel;
-    out[((index * channel + c) * height_out + h + pad) * width_out + pad + w] =
-        in[((index * channel + c) * height_in + h) * width_in + w];
-  }
-}
-
-template <typename Dtype>
-Dtype PaddingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-    vector<Blob<Dtype>*>* top) {
-  const Dtype* bottom_data = bottom[0]->gpu_data();
-  Dtype* top_data = (*top)[0]->mutable_gpu_data();
-  const int count = bottom[0]->count();
-  // First, set all data to be zero for the boundary pixels
-  CUDA_CHECK(cudaMemset(top_data, 0, sizeof(Dtype) * (*top)[0]->count()));
-  // NOLINT_NEXT_LINE(whitespace/operators)
-  PaddingForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
-      count, bottom_data, top_data, NUM_, CHANNEL_, HEIGHT_IN_, WIDTH_IN_,
-      PAD_);
-  CUDA_POST_KERNEL_CHECK;
-  return Dtype(0);
-}
-
-template <typename Dtype>
-__global__ void PaddingBackward(const int count, const Dtype* in, Dtype* out,
-    const int num, const int channel, const int height_in, const int width_in,
-    const int pad) {
-  CUDA_KERNEL_LOOP(index, count) {
-    int height_out = height_in + pad + pad;
-    int width_out = width_in + pad + pad;
-    int w = index % width_in;
-    index /= width_in;
-    int h = index % height_in;
-    index /= height_in;
-    int c = index % channel;
-    index /= channel;
-    out[((index * channel + c) * height_in + h) * width_in + w] =
-        in[((index * channel + c) * height_out + h + pad) *
-           width_out + pad + w];
-  }
-}
-
-template <typename Dtype>
-void PaddingLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
-    const bool propagate_down,
-    vector<Blob<Dtype>*>* bottom) {
-  if (propagate_down) {
-    const Dtype* top_diff = top[0]->gpu_diff();
-    Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
-    const int count = (*bottom)[0]->count();
-    // NOLINT_NEXT_LINE(whitespace/operators)
-    PaddingBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
-        count, top_diff, bottom_diff, NUM_, CHANNEL_, HEIGHT_IN_, WIDTH_IN_,
-        PAD_);
-    CUDA_POST_KERNEL_CHECK;
-  }
-}
-
-INSTANTIATE_CLASS(PaddingLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp
index 355ea290604..a186741232f 100644
--- a/src/caffe/layers/pooling_layer.cpp
+++ b/src/caffe/layers/pooling_layer.cpp
@@ -18,21 +18,22 @@ void PoolingLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   CHECK_EQ(bottom.size(), 1) << "PoolingLayer takes a single blob as input.";
   CHECK_EQ(top->size(), 1) << "PoolingLayer takes a single blob as output.";
-  KSIZE_ = this->layer_param_.kernelsize();
-  STRIDE_ = this->layer_param_.stride();
-  CHANNELS_ = bottom[0]->channels();
-  HEIGHT_ = bottom[0]->height();
-  WIDTH_ = bottom[0]->width();
-  POOLED_HEIGHT_ = static_cast<int>(
-      ceil(static_cast<float>(HEIGHT_ - KSIZE_) / STRIDE_)) + 1;
-  POOLED_WIDTH_ = static_cast<int>(
-      ceil(static_cast<float>(WIDTH_ - KSIZE_) / STRIDE_)) + 1;
-  (*top)[0]->Reshape(bottom[0]->num(), CHANNELS_, POOLED_HEIGHT_,
-      POOLED_WIDTH_);
+  kernel_size_ = this->layer_param_.pooling_param().kernel_size();
+  stride_ = this->layer_param_.pooling_param().stride();
+  channels_ = bottom[0]->channels();
+  height_ = bottom[0]->height();
+  width_ = bottom[0]->width();
+  pooled_height_ = static_cast<int>(
+      ceil(static_cast<float>(height_ - kernel_size_) / stride_)) + 1;
+  pooled_width_ = static_cast<int>(
+      ceil(static_cast<float>(width_ - kernel_size_) / stride_)) + 1;
+  (*top)[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
+      pooled_width_);
   // If stochastic pooling, we will initialize the random index part.
-  if (this->layer_param_.pool() == LayerParameter_PoolMethod_STOCHASTIC) {
-    rand_idx_.Reshape(bottom[0]->num(), CHANNELS_, POOLED_HEIGHT_,
-      POOLED_WIDTH_);
+  if (this->layer_param_.pooling_param().pool() ==
+      PoolingParameter_PoolMethod_STOCHASTIC) {
+    rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
+      pooled_width_);
   }
 }
 
@@ -46,26 +47,26 @@ Dtype PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   // Different pooling methods. We explicitly do the switch outside the for
   // loop to save time, although this results in more codes.
   int top_count = (*top)[0]->count();
-  switch (this->layer_param_.pool()) {
-  case LayerParameter_PoolMethod_MAX:
+  switch (this->layer_param_.pooling_param().pool()) {
+  case PoolingParameter_PoolMethod_MAX:
     // Initialize
     for (int i = 0; i < top_count; ++i) {
       top_data[i] = -FLT_MAX;
     }
     // The main loop
     for (int n = 0; n < bottom[0]->num(); ++n) {
-      for (int c = 0; c < CHANNELS_; ++c) {
-        for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) {
-          for (int pw = 0; pw < POOLED_WIDTH_; ++pw) {
-            int hstart = ph * STRIDE_;
-            int wstart = pw * STRIDE_;
-            int hend = min(hstart + KSIZE_, HEIGHT_);
-            int wend = min(wstart + KSIZE_, WIDTH_);
+      for (int c = 0; c < channels_; ++c) {
+        for (int ph = 0; ph < pooled_height_; ++ph) {
+          for (int pw = 0; pw < pooled_width_; ++pw) {
+            int hstart = ph * stride_;
+            int wstart = pw * stride_;
+            int hend = min(hstart + kernel_size_, height_);
+            int wend = min(wstart + kernel_size_, width_);
             for (int h = hstart; h < hend; ++h) {
               for (int w = wstart; w < wend; ++w) {
-                top_data[ph * POOLED_WIDTH_ + pw] =
-                  max(top_data[ph * POOLED_WIDTH_ + pw],
-                      bottom_data[h * WIDTH_ + w]);
+                top_data[ph * pooled_width_ + pw] =
+                  max(top_data[ph * pooled_width_ + pw],
+                      bottom_data[h * width_ + w]);
               }
             }
           }
@@ -76,26 +77,26 @@ Dtype PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       }
     }
     break;
-  case LayerParameter_PoolMethod_AVE:
+  case PoolingParameter_PoolMethod_AVE:
     for (int i = 0; i < top_count; ++i) {
       top_data[i] = 0;
     }
     // The main loop
     for (int n = 0; n < bottom[0]->num(); ++n) {
-      for (int c = 0; c < CHANNELS_; ++c) {
-        for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) {
-          for (int pw = 0; pw < POOLED_WIDTH_; ++pw) {
-            int hstart = ph * STRIDE_;
-            int wstart = pw * STRIDE_;
-            int hend = min(hstart + KSIZE_, HEIGHT_);
-            int wend = min(wstart + KSIZE_, WIDTH_);
+      for (int c = 0; c < channels_; ++c) {
+        for (int ph = 0; ph < pooled_height_; ++ph) {
+          for (int pw = 0; pw < pooled_width_; ++pw) {
+            int hstart = ph * stride_;
+            int wstart = pw * stride_;
+            int hend = min(hstart + kernel_size_, height_);
+            int wend = min(wstart + kernel_size_, width_);
             for (int h = hstart; h < hend; ++h) {
               for (int w = wstart; w < wend; ++w) {
-                top_data[ph * POOLED_WIDTH_ + pw] +=
-                    bottom_data[h * WIDTH_ + w];
+                top_data[ph * pooled_width_ + pw] +=
+                    bottom_data[h * width_ + w];
               }
             }
-            top_data[ph * POOLED_WIDTH_ + pw] /=
+            top_data[ph * pooled_width_ + pw] /=
                 (hend - hstart) * (wend - wstart);
           }
         }
@@ -105,7 +106,7 @@ Dtype PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       }
     }
     break;
-  case LayerParameter_PoolMethod_STOCHASTIC:
+  case PoolingParameter_PoolMethod_STOCHASTIC:
     NOT_IMPLEMENTED;
     break;
   default:
@@ -127,23 +128,23 @@ void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   // Different pooling methods. We explicitly do the switch outside the for
   // loop to save time, although this results in more codes.
   memset(bottom_diff, 0, (*bottom)[0]->count() * sizeof(Dtype));
-  switch (this->layer_param_.pool()) {
-  case LayerParameter_PoolMethod_MAX:
+  switch (this->layer_param_.pooling_param().pool()) {
+  case PoolingParameter_PoolMethod_MAX:
     // The main loop
     for (int n = 0; n < top[0]->num(); ++n) {
-      for (int c = 0; c < CHANNELS_; ++c) {
-        for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) {
-          for (int pw = 0; pw < POOLED_WIDTH_; ++pw) {
-            int hstart = ph * STRIDE_;
-            int wstart = pw * STRIDE_;
-            int hend = min(hstart + KSIZE_, HEIGHT_);
-            int wend = min(wstart + KSIZE_, WIDTH_);
+      for (int c = 0; c < channels_; ++c) {
+        for (int ph = 0; ph < pooled_height_; ++ph) {
+          for (int pw = 0; pw < pooled_width_; ++pw) {
+            int hstart = ph * stride_;
+            int wstart = pw * stride_;
+            int hend = min(hstart + kernel_size_, height_);
+            int wend = min(wstart + kernel_size_, width_);
             for (int h = hstart; h < hend; ++h) {
               for (int w = wstart; w < wend; ++w) {
-                bottom_diff[h * WIDTH_ + w] +=
-                    top_diff[ph * POOLED_WIDTH_ + pw] *
-                    (bottom_data[h * WIDTH_ + w] ==
-                        top_data[ph * POOLED_WIDTH_ + pw]);
+                bottom_diff[h * width_ + w] +=
+                    top_diff[ph * pooled_width_ + pw] *
+                    (bottom_data[h * width_ + w] ==
+                        top_data[ph * pooled_width_ + pw]);
               }
             }
           }
@@ -156,21 +157,21 @@ void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
       }
     }
     break;
-  case LayerParameter_PoolMethod_AVE:
+  case PoolingParameter_PoolMethod_AVE:
     // The main loop
     for (int n = 0; n < top[0]->num(); ++n) {
-      for (int c = 0; c < CHANNELS_; ++c) {
-        for (int ph = 0; ph < POOLED_HEIGHT_; ++ph) {
-          for (int pw = 0; pw < POOLED_WIDTH_; ++pw) {
-            int hstart = ph * STRIDE_;
-            int wstart = pw * STRIDE_;
-            int hend = min(hstart + KSIZE_, HEIGHT_);
-            int wend = min(wstart + KSIZE_, WIDTH_);
+      for (int c = 0; c < channels_; ++c) {
+        for (int ph = 0; ph < pooled_height_; ++ph) {
+          for (int pw = 0; pw < pooled_width_; ++pw) {
+            int hstart = ph * stride_;
+            int wstart = pw * stride_;
+            int hend = min(hstart + kernel_size_, height_);
+            int wend = min(wstart + kernel_size_, width_);
             int poolsize = (hend - hstart) * (wend - wstart);
             for (int h = hstart; h < hend; ++h) {
               for (int w = wstart; w < wend; ++w) {
-                bottom_diff[h * WIDTH_ + w] +=
-                  top_diff[ph * POOLED_WIDTH_ + pw] / poolsize;
+                bottom_diff[h * width_ + w] +=
+                  top_diff[ph * pooled_width_ + pw] / poolsize;
               }
             }
           }
@@ -183,7 +184,7 @@ void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
       }
     }
     break;
-  case LayerParameter_PoolMethod_STOCHASTIC:
+  case PoolingParameter_PoolMethod_STOCHASTIC:
     NOT_IMPLEMENTED;
     break;
   default:
diff --git a/src/caffe/layers/pooling_layer.cu b/src/caffe/layers/pooling_layer.cu
index e8dc2a28349..7adf348be34 100644
--- a/src/caffe/layers/pooling_layer.cu
+++ b/src/caffe/layers/pooling_layer.cu
@@ -16,17 +16,17 @@ namespace caffe {
 template <typename Dtype>
 __global__ void MaxPoolForward(const int nthreads, const Dtype* bottom_data,
     const int num, const int channels, const int height,
-    const int width, const int pooled_height, const int pooled_width,
-    const int ksize, const int stride, Dtype* top_data) {
+    const int width, const int pooled_height_, const int pooled_width,
+    const int kernel_size, const int stride, Dtype* top_data) {
   CUDA_KERNEL_LOOP(index, nthreads) {
     int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
+    int ph = (index / pooled_width) % pooled_height_;
+    int c = (index / pooled_width / pooled_height_) % channels;
+    int n = index / pooled_width / pooled_height_ / channels;
     int hstart = ph * stride;
-    int hend = min(hstart + ksize, height);
+    int hend = min(hstart + kernel_size, height);
     int wstart = pw * stride;
-    int wend = min(wstart + ksize, width);
+    int wend = min(wstart + kernel_size, width);
     Dtype maxval = -FLT_MAX;
     bottom_data += (n * channels + c) * height * width;
     for (int h = hstart; h < hend; ++h) {
@@ -41,17 +41,17 @@ __global__ void MaxPoolForward(const int nthreads, const Dtype* bottom_data,
 template <typename Dtype>
 __global__ void AvePoolForward(const int nthreads, const Dtype* bottom_data,
     const int num, const int channels, const int height,
-    const int width, const int pooled_height, const int pooled_width,
-    const int ksize, const int stride, Dtype* top_data) {
+    const int width, const int pooled_height_, const int pooled_width,
+    const int kernel_size, const int stride, Dtype* top_data) {
   CUDA_KERNEL_LOOP(index, nthreads) {
     int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
+    int ph = (index / pooled_width) % pooled_height_;
+    int c = (index / pooled_width / pooled_height_) % channels;
+    int n = index / pooled_width / pooled_height_ / channels;
     int hstart = ph * stride;
-    int hend = min(hstart + ksize, height);
+    int hend = min(hstart + kernel_size, height);
     int wstart = pw * stride;
-    int wend = min(wstart + ksize, width);
+    int wend = min(wstart + kernel_size, width);
     Dtype aveval = 0;
     bottom_data += (n * channels + c) * height * width;
     for (int h = hstart; h < hend; ++h) {
@@ -67,17 +67,17 @@ template <typename Dtype>
 __global__ void StoPoolForwardTrain(const int nthreads,
     const Dtype* bottom_data,
     const int num, const int channels, const int height,
-    const int width, const int pooled_height, const int pooled_width,
-    const int ksize, const int stride, float* rand_idx, Dtype* top_data) {
+    const int width, const int pooled_height_, const int pooled_width,
+    const int kernel_size, const int stride, float* rand_idx, Dtype* top_data) {
   CUDA_KERNEL_LOOP(index, nthreads) {
     int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
+    int ph = (index / pooled_width) % pooled_height_;
+    int c = (index / pooled_width / pooled_height_) % channels;
+    int n = index / pooled_width / pooled_height_ / channels;
     int hstart = ph * stride;
-    int hend = min(hstart + ksize, height);
+    int hend = min(hstart + kernel_size, height);
     int wstart = pw * stride;
-    int wend = min(wstart + ksize, width);
+    int wend = min(wstart + kernel_size, width);
     Dtype cumsum = 0.;
     bottom_data += (n * channels + c) * height * width;
     // First pass: get sum
@@ -107,17 +107,17 @@ template <typename Dtype>
 __global__ void StoPoolForwardTest(const int nthreads,
     const Dtype* bottom_data,
     const int num, const int channels, const int height,
-    const int width, const int pooled_height, const int pooled_width,
-    const int ksize, const int stride, Dtype* top_data) {
+    const int width, const int pooled_height_, const int pooled_width,
+    const int kernel_size, const int stride, Dtype* top_data) {
   CUDA_KERNEL_LOOP(index, nthreads) {
     int pw = index % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
+    int ph = (index / pooled_width) % pooled_height_;
+    int c = (index / pooled_width / pooled_height_) % channels;
+    int n = index / pooled_width / pooled_height_ / channels;
     int hstart = ph * stride;
-    int hend = min(hstart + ksize, height);
+    int hend = min(hstart + kernel_size, height);
     int wstart = pw * stride;
-    int wend = min(wstart + ksize, width);
+    int wend = min(wstart + kernel_size, width);
     // We set cumsum to be 0 to avoid divide-by-zero problems
     Dtype cumsum = FLT_MIN;
     Dtype cumvalues = 0.;
@@ -140,22 +140,22 @@ Dtype PoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = (*top)[0]->mutable_gpu_data();
   int count = (*top)[0]->count();
-  switch (this->layer_param_.pool()) {
-  case LayerParameter_PoolMethod_MAX:
+  switch (this->layer_param_.pooling_param().pool()) {
+  case PoolingParameter_PoolMethod_MAX:
     // NOLINT_NEXT_LINE(whitespace/operators)
     MaxPoolForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
-        count, bottom_data, bottom[0]->num(), CHANNELS_,
-        HEIGHT_, WIDTH_, POOLED_HEIGHT_, POOLED_WIDTH_, KSIZE_, STRIDE_,
+        count, bottom_data, bottom[0]->num(), channels_,
+        height_, width_, pooled_height_, pooled_width_, kernel_size_, stride_,
         top_data);
     break;
-  case LayerParameter_PoolMethod_AVE:
+  case PoolingParameter_PoolMethod_AVE:
     // NOLINT_NEXT_LINE(whitespace/operators)
     AvePoolForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
-        count, bottom_data, bottom[0]->num(), CHANNELS_,
-        HEIGHT_, WIDTH_, POOLED_HEIGHT_, POOLED_WIDTH_, KSIZE_, STRIDE_,
+        count, bottom_data, bottom[0]->num(), channels_,
+        height_, width_, pooled_height_, pooled_width_, kernel_size_, stride_,
         top_data);
     break;
-  case LayerParameter_PoolMethod_STOCHASTIC:
+  case PoolingParameter_PoolMethod_STOCHASTIC:
     if (Caffe::phase() == Caffe::TRAIN) {
       // We need to create the random index as well.
       CURAND_CHECK(curandGenerateUniform(Caffe::curand_generator(),
@@ -163,15 +163,15 @@ Dtype PoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       // NOLINT_NEXT_LINE(whitespace/operators)
       StoPoolForwardTrain<Dtype><<<CAFFE_GET_BLOCKS(count),
                                    CAFFE_CUDA_NUM_THREADS>>>(
-          count, bottom_data, bottom[0]->num(), CHANNELS_,
-          HEIGHT_, WIDTH_, POOLED_HEIGHT_, POOLED_WIDTH_, KSIZE_, STRIDE_,
+          count, bottom_data, bottom[0]->num(), channels_,
+          height_, width_, pooled_height_, pooled_width_, kernel_size_, stride_,
           rand_idx_.mutable_gpu_data(), top_data);
     } else {
       // NOLINT_NEXT_LINE(whitespace/operators)
       StoPoolForwardTest<Dtype><<<CAFFE_GET_BLOCKS(count),
                                   CAFFE_CUDA_NUM_THREADS>>>(
-          count, bottom_data, bottom[0]->num(), CHANNELS_,
-          HEIGHT_, WIDTH_, POOLED_HEIGHT_, POOLED_WIDTH_, KSIZE_, STRIDE_,
+          count, bottom_data, bottom[0]->num(), channels_,
+          height_, width_, pooled_height_, pooled_width_, kernel_size_, stride_,
           top_data);
     }
     break;
@@ -186,8 +186,8 @@ template <typename Dtype>
 __global__ void MaxPoolBackward(const int nthreads, const Dtype* bottom_data,
     const Dtype* top_data, const Dtype* top_diff,
     const int num, const int channels, const int height,
-    const int width, const int pooled_height, const int pooled_width,
-    const int ksize, const int stride, Dtype* bottom_diff) {
+    const int width, const int pooled_height_, const int pooled_width,
+    const int kernel_size, const int stride, Dtype* bottom_diff) {
   CUDA_KERNEL_LOOP(index, nthreads) {
     // find out the local index
     // find out the local offset
@@ -195,15 +195,15 @@ __global__ void MaxPoolBackward(const int nthreads, const Dtype* bottom_data,
     int h = (index / width) % height;
     int c = (index / width / height) % channels;
     int n = index / width / height / channels;
-    int phstart = (h < ksize) ? 0 : (h - ksize) / stride + 1;
-    int phend = min(h / stride + 1, pooled_height);
-    int pwstart = (w < ksize) ? 0 : (w - ksize) / stride + 1;
+    int phstart = (h < kernel_size) ? 0 : (h - kernel_size) / stride + 1;
+    int phend = min(h / stride + 1, pooled_height_);
+    int pwstart = (w < kernel_size) ? 0 : (w - kernel_size) / stride + 1;
     int pwend = min(w / stride + 1, pooled_width);
     Dtype gradient = 0;
     Dtype bottom_datum =
         bottom_data[((n * channels + c) * height + h) * width + w];
-    top_data += (n * channels + c) * pooled_height * pooled_width;
-    top_diff += (n * channels + c) * pooled_height * pooled_width;
+    top_data += (n * channels + c) * pooled_height_ * pooled_width;
+    top_diff += (n * channels + c) * pooled_height_ * pooled_width;
     for (int ph = phstart; ph < phend; ++ph) {
       for (int pw = pwstart; pw < pwend; ++pw) {
         gradient += top_diff[ph * pooled_width + pw] *
@@ -218,8 +218,8 @@ __global__ void MaxPoolBackward(const int nthreads, const Dtype* bottom_data,
 template <typename Dtype>
 __global__ void AvePoolBackward(const int nthreads, const Dtype* top_diff,
     const int num, const int channels, const int height,
-    const int width, const int pooled_height, const int pooled_width,
-    const int ksize, const int stride, Dtype* bottom_diff) {
+    const int width, const int pooled_height_, const int pooled_width,
+    const int kernel_size, const int stride, Dtype* bottom_diff) {
   CUDA_KERNEL_LOOP(index, nthreads) {
     // find out the local index
     // find out the local offset
@@ -227,17 +227,17 @@ __global__ void AvePoolBackward(const int nthreads, const Dtype* top_diff,
     int h = (index / width) % height;
     int c = (index / width / height) % channels;
     int n = index / width / height / channels;
-    int phstart = (h < ksize) ? 0 : (h - ksize) / stride + 1;
-    int phend = min(h / stride + 1, pooled_height);
-    int pwstart = (w < ksize) ? 0 : (w - ksize) / stride + 1;
+    int phstart = (h < kernel_size) ? 0 : (h - kernel_size) / stride + 1;
+    int phend = min(h / stride + 1, pooled_height_);
+    int pwstart = (w < kernel_size) ? 0 : (w - kernel_size) / stride + 1;
     int pwend = min(w / stride + 1, pooled_width);
     Dtype gradient = 0;
-    top_diff += (n * channels + c) * pooled_height * pooled_width;
+    top_diff += (n * channels + c) * pooled_height_ * pooled_width;
     for (int ph = phstart; ph < phend; ++ph) {
       for (int pw = pwstart; pw < pwend; ++pw) {
         // figure out the pooling size
-        int poolsize = (min(ph * stride + ksize, height) - ph * stride) *
-            (min(pw * stride + ksize, width) - pw * stride);
+        int poolsize = (min(ph * stride + kernel_size, height) - ph * stride) *
+            (min(pw * stride + kernel_size, width) - pw * stride);
         gradient += top_diff[ph * pooled_width + pw] / poolsize;
       }
     }
@@ -250,8 +250,8 @@ template <typename Dtype>
 __global__ void StoPoolBackward(const int nthreads,
     const float* rand_idx, const Dtype* top_diff,
     const int num, const int channels, const int height,
-    const int width, const int pooled_height, const int pooled_width,
-    const int ksize, const int stride, Dtype* bottom_diff) {
+    const int width, const int pooled_height_, const int pooled_width,
+    const int kernel_size, const int stride, Dtype* bottom_diff) {
   CUDA_KERNEL_LOOP(index, nthreads) {
     // find out the local index
     // find out the local offset
@@ -259,13 +259,13 @@ __global__ void StoPoolBackward(const int nthreads,
     int h = (index / width) % height;
     int c = (index / width / height) % channels;
     int n = index / width / height / channels;
-    int phstart = (h < ksize) ? 0 : (h - ksize) / stride + 1;
-    int phend = min(h / stride + 1, pooled_height);
-    int pwstart = (w < ksize) ? 0 : (w - ksize) / stride + 1;
+    int phstart = (h < kernel_size) ? 0 : (h - kernel_size) / stride + 1;
+    int phend = min(h / stride + 1, pooled_height_);
+    int pwstart = (w < kernel_size) ? 0 : (w - kernel_size) / stride + 1;
     int pwend = min(w / stride + 1, pooled_width);
     Dtype gradient = 0;
-    rand_idx += (n * channels + c) * pooled_height * pooled_width;
-    top_diff += (n * channels + c) * pooled_height * pooled_width;
+    rand_idx += (n * channels + c) * pooled_height_ * pooled_width;
+    top_diff += (n * channels + c) * pooled_height_ * pooled_width;
     for (int ph = phstart; ph < phend; ++ph) {
       for (int pw = pwstart; pw < pwend; ++pw) {
         gradient += top_diff[ph * pooled_width + pw] *
@@ -286,27 +286,27 @@ void PoolingLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
   int count = (*bottom)[0]->count();
-  switch (this->layer_param_.pool()) {
-  case LayerParameter_PoolMethod_MAX:
+  switch (this->layer_param_.pooling_param().pool()) {
+  case PoolingParameter_PoolMethod_MAX:
     // NOLINT_NEXT_LINE(whitespace/operators)
     MaxPoolBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
         count, (*bottom)[0]->gpu_data(), top[0]->gpu_data(), top_diff,
-        top[0]->num(), CHANNELS_, HEIGHT_, WIDTH_, POOLED_HEIGHT_,
-        POOLED_WIDTH_, KSIZE_, STRIDE_, bottom_diff);
+        top[0]->num(), channels_, height_, width_, pooled_height_,
+        pooled_width_, kernel_size_, stride_, bottom_diff);
     break;
-  case LayerParameter_PoolMethod_AVE:
+  case PoolingParameter_PoolMethod_AVE:
     // NOLINT_NEXT_LINE(whitespace/operators)
     AvePoolBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
-        count, top_diff, top[0]->num(), CHANNELS_,
-        HEIGHT_, WIDTH_, POOLED_HEIGHT_, POOLED_WIDTH_, KSIZE_, STRIDE_,
+        count, top_diff, top[0]->num(), channels_,
+        height_, width_, pooled_height_, pooled_width_, kernel_size_, stride_,
         bottom_diff);
     break;
-  case LayerParameter_PoolMethod_STOCHASTIC:
+  case PoolingParameter_PoolMethod_STOCHASTIC:
     // NOLINT_NEXT_LINE(whitespace/operators)
     StoPoolBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
         count, rand_idx_.gpu_data(), top_diff,
-        top[0]->num(), CHANNELS_, HEIGHT_, WIDTH_, POOLED_HEIGHT_,
-        POOLED_WIDTH_, KSIZE_, STRIDE_, bottom_diff);
+        top[0]->num(), channels_, height_, width_, pooled_height_,
+        pooled_width_, kernel_size_, stride_, bottom_diff);
     break;
   default:
     LOG(FATAL) << "Unknown pooling method.";
diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index bf62bcb49c2..838f150ee40 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -24,9 +24,9 @@ using std::string;
 using std::map;
 using std::pair;
 
-// caffe.proto > LayerParameter
+// caffe.proto > LayerParameter > WindowDataParameter
 //   'source' field specifies the window_file
-//   'cropsize' indicates the desired warped size
+//   'crop_size' indicates the desired warped size
 
 namespace caffe {
 
@@ -40,29 +40,30 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
 
   Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
   Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
-  const Dtype scale = layer->layer_param_.scale();
-  const int batchsize = layer->layer_param_.batchsize();
-  const int cropsize = layer->layer_param_.cropsize();
-  const int context_pad = layer->layer_param_.det_context_pad();
-  const bool mirror = layer->layer_param_.mirror();
-  const float fg_fraction = layer->layer_param_.det_fg_fraction();
+  const Dtype scale = layer->layer_param_.window_data_param().scale();
+  const int batch_size = layer->layer_param_.window_data_param().batch_size();
+  const int crop_size = layer->layer_param_.window_data_param().crop_size();
+  const int context_pad = layer->layer_param_.window_data_param().context_pad();
+  const bool mirror = layer->layer_param_.window_data_param().mirror();
+  const float fg_fraction =
+      layer->layer_param_.window_data_param().fg_fraction();
   const Dtype* mean = layer->data_mean_.cpu_data();
-  const int mean_off = (layer->data_mean_.width() - cropsize) / 2;
+  const int mean_off = (layer->data_mean_.width() - crop_size) / 2;
   const int mean_width = layer->data_mean_.width();
   const int mean_height = layer->data_mean_.height();
-  cv::Size cv_crop_size(cropsize, cropsize);
-  const string& crop_mode = layer->layer_param_.det_crop_mode();
+  cv::Size cv_crop_size(crop_size, crop_size);
+  const string& crop_mode = layer->layer_param_.window_data_param().crop_mode();
 
   bool use_square = (crop_mode == "square") ? true : false;
 
   // zero out batch
   memset(top_data, 0, sizeof(Dtype)*layer->prefetch_data_->count());
 
-  const int num_fg = static_cast<int>(static_cast<float>(batchsize)
+  const int num_fg = static_cast<int>(static_cast<float>(batch_size)
       * fg_fraction);
-  const int num_samples[2] = { batchsize - num_fg, num_fg };
+  const int num_samples[2] = { batch_size - num_fg, num_fg };
 
-  int itemid = 0;
+  int item_id = 0;
   // sample from bg set then fg set
   for (int is_fg = 0; is_fg < 2; ++is_fg) {
     for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) {
@@ -100,10 +101,10 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
       int pad_h = 0;
       if (context_pad > 0 || use_square) {
         // scale factor by which to expand the original region
-        // such that after warping the expanded region to cropsize x cropsize
+        // such that after warping the expanded region to crop_size x crop_size
         // there's exactly context_pad amount of padding on each side
-        Dtype context_scale = static_cast<Dtype>(cropsize) /
-            static_cast<Dtype>(cropsize - 2*context_pad);
+        Dtype context_scale = static_cast<Dtype>(crop_size) /
+            static_cast<Dtype>(crop_size - 2*context_pad);
 
         // compute the expanded region
         Dtype half_height = static_cast<Dtype>(y2-y1+1)/2.0;
@@ -147,9 +148,9 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
         // scale factors that would be used to warp the unclipped
         // expanded region
         Dtype scale_x =
-            static_cast<Dtype>(cropsize)/static_cast<Dtype>(unclipped_width);
+            static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_width);
         Dtype scale_y =
-            static_cast<Dtype>(cropsize)/static_cast<Dtype>(unclipped_height);
+            static_cast<Dtype>(crop_size)/static_cast<Dtype>(unclipped_height);
 
         // size to warp the clipped expanded region to
         cv_crop_size.width =
@@ -169,13 +170,13 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
           pad_w = pad_x1;
         }
 
-        // ensure that the warped, clipped region plus the padding
-        // fits in the cropsize x cropsize image (it might not due to rounding)
-        if (pad_h + cv_crop_size.height > cropsize) {
-          cv_crop_size.height = cropsize - pad_h;
+        // ensure that the warped, clipped region plus the padding fits in the
+        // crop_size x crop_size image (it might not due to rounding)
+        if (pad_h + cv_crop_size.height > crop_size) {
+          cv_crop_size.height = crop_size - pad_h;
         }
-        if (pad_w + cv_crop_size.width > cropsize) {
-          cv_crop_size.width = cropsize - pad_w;
+        if (pad_w + cv_crop_size.width > crop_size) {
+          cv_crop_size.width = crop_size - pad_w;
         }
       }
 
@@ -196,8 +197,8 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
             Dtype pixel =
                 static_cast<Dtype>(cv_cropped_img.at<cv::Vec3b>(h, w)[c]);
 
-            top_data[((itemid * channels + c) * cropsize + h + pad_h)
-                     * cropsize + w + pad_w]
+            top_data[((item_id * channels + c) * crop_size + h + pad_h)
+                     * crop_size + w + pad_w]
                 = (pixel
                     - mean[(c * mean_height + h + mean_off + pad_h)
                            * mean_width + w + mean_off + pad_w])
@@ -207,7 +208,7 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
       }
 
       // get window label
-      top_label[itemid] = window[WindowDataLayer<Dtype>::LABEL];
+      top_label[item_id] = window[WindowDataLayer<Dtype>::LABEL];
 
       #if 0
       // useful debugging code for dumping transformed windows to disk
@@ -224,18 +225,18 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
           << window[WindowDataLayer<Dtype>::X2]+1 << std::endl
           << window[WindowDataLayer<Dtype>::Y2]+1 << std::endl
           << do_mirror << std::endl
-          << top_label[itemid] << std::endl
+          << top_label[item_id] << std::endl
           << is_fg << std::endl;
       inf.close();
       std::ofstream top_data_file((string("dump/") + file_id +
           string("_data.txt")).c_str(),
           std::ofstream::out | std::ofstream::binary);
       for (int c = 0; c < channels; ++c) {
-        for (int h = 0; h < cropsize; ++h) {
-          for (int w = 0; w < cropsize; ++w) {
+        for (int h = 0; h < crop_size; ++h) {
+          for (int w = 0; w < crop_size; ++w) {
             top_data_file.write(reinterpret_cast<char*>(
-                &top_data[((itemid * channels + c) * cropsize + h)
-                          * cropsize + w]),
+                &top_data[((item_id * channels + c) * crop_size + h)
+                          * crop_size + w]),
                 sizeof(Dtype));
           }
         }
@@ -243,7 +244,7 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
       top_data_file.close();
       #endif
 
-      itemid++;
+      item_id++;
     }
   }
 
@@ -278,15 +279,15 @@ void WindowDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
 
   LOG(INFO) << "Window data layer:" << std::endl
       << "  foreground (object) overlap threshold: "
-      << this->layer_param_.det_fg_threshold() << std::endl
+      << this->layer_param_.window_data_param().fg_threshold() << std::endl
       << "  background (non-object) overlap threshold: "
-      << this->layer_param_.det_bg_threshold() << std::endl
+      << this->layer_param_.window_data_param().bg_threshold() << std::endl
       << "  foreground sampling fraction: "
-      << this->layer_param_.det_fg_fraction();
+      << this->layer_param_.window_data_param().fg_fraction();
 
-  std::ifstream infile(this->layer_param_.source().c_str());
+  std::ifstream infile(this->layer_param_.window_data_param().source().c_str());
   CHECK(infile.good()) << "Failed to open window file "
-      << this->layer_param_.source() << std::endl;
+      << this->layer_param_.window_data_param().source() << std::endl;
 
   map<int, int> label_hist;
   label_hist.insert(std::make_pair(0, 0));
@@ -307,6 +308,10 @@ void WindowDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
     // read each box
     int num_windows;
     infile >> num_windows;
+    const float fg_threshold =
+        this->layer_param_.window_data_param().fg_threshold();
+    const float bg_threshold =
+        this->layer_param_.window_data_param().bg_threshold();
     for (int i = 0; i < num_windows; ++i) {
       int label, x1, y1, x2, y2;
       float overlap;
@@ -322,13 +327,13 @@ void WindowDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
       window[WindowDataLayer::Y2] = y2;
 
       // add window to foreground list or background list
-      if (overlap >= this->layer_param_.det_fg_threshold()) {
+      if (overlap >= fg_threshold) {
         int label = window[WindowDataLayer::LABEL];
         CHECK_GT(label, 0);
         fg_windows_.push_back(window);
         label_hist.insert(std::make_pair(label, 0));
         label_hist[label]++;
-      } else if (overlap < this->layer_param_.det_bg_threshold()) {
+      } else if (overlap < bg_threshold) {
         // background window, force label and overlap to 0
         window[WindowDataLayer::LABEL] = 0;
         window[WindowDataLayer::OVERLAP] = 0;
@@ -356,38 +361,41 @@ void WindowDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   }
 
   LOG(INFO) << "Amount of context padding: "
-      << this->layer_param_.det_context_pad();
+      << this->layer_param_.window_data_param().context_pad();
 
-  LOG(INFO) << "Crop mode: " << this->layer_param_.det_crop_mode();
+  LOG(INFO) << "Crop mode: "
+      << this->layer_param_.window_data_param().crop_mode();
 
   // image
-  int cropsize = this->layer_param_.cropsize();
-  CHECK_GT(cropsize, 0);
-  (*top)[0]->Reshape(
-      this->layer_param_.batchsize(), channels, cropsize, cropsize);
-  prefetch_data_.reset(new Blob<Dtype>(
-      this->layer_param_.batchsize(), channels, cropsize, cropsize));
+  int crop_size = this->layer_param_.window_data_param().crop_size();
+  CHECK_GT(crop_size, 0);
+  const int batch_size = this->layer_param_.window_data_param().batch_size();
+  (*top)[0]->Reshape(batch_size, channels, crop_size, crop_size);
+  prefetch_data_.reset(
+      new Blob<Dtype>(batch_size, channels, crop_size, crop_size));
 
   LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
       << (*top)[0]->channels() << "," << (*top)[0]->height() << ","
       << (*top)[0]->width();
   // label
-  (*top)[1]->Reshape(this->layer_param_.batchsize(), 1, 1, 1);
+  (*top)[1]->Reshape(batch_size, 1, 1, 1);
   prefetch_label_.reset(
-      new Blob<Dtype>(this->layer_param_.batchsize(), 1, 1, 1));
+      new Blob<Dtype>(batch_size, 1, 1, 1));
 
   // check if we want to have mean
-  if (this->layer_param_.has_meanfile()) {
+  if (this->layer_param_.window_data_param().has_mean_file()) {
+    const string& mean_file =
+        this->layer_param_.window_data_param().mean_file();
+    LOG(INFO) << "Loading mean file from" << mean_file;
     BlobProto blob_proto;
-    LOG(INFO) << "Loading mean file from" << this->layer_param_.meanfile();
-    ReadProtoFromBinaryFile(this->layer_param_.meanfile().c_str(), &blob_proto);
+    ReadProtoFromBinaryFileOrDie(mean_file, &blob_proto);
     data_mean_.FromProto(blob_proto);
     CHECK_EQ(data_mean_.num(), 1);
     CHECK_EQ(data_mean_.width(), data_mean_.height());
     CHECK_EQ(data_mean_.channels(), channels);
   } else {
     // Simply initialize an all-empty mean.
-    data_mean_.Reshape(1, channels, cropsize, cropsize);
+    data_mean_.Reshape(1, channels, crop_size, crop_size);
   }
   // Now, start the prefetch thread. Before calling prefetch, we make two
   // cpu_data calls so that the prefetch thread does not accidentally make
diff --git a/src/caffe/layers/window_data_layer.cu b/src/caffe/layers/window_data_layer.cu
index d46b5e02552..69614625f7d 100644
--- a/src/caffe/layers/window_data_layer.cu
+++ b/src/caffe/layers/window_data_layer.cu
@@ -16,9 +16,9 @@ using std::string;
 using std::map;
 using std::pair;
 
-// caffe.proto > LayerParameter
+// caffe.proto > LayerParameter > WindowDataParameter
 //   'source' field specifies the window_file
-//   'cropsize' indicates the desired warped size
+//   'crop_size' indicates the desired warped size
 
 namespace caffe {
 
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 1ba0e835cfd..a80abc6c029 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -5,11 +5,13 @@
 #include <string>
 #include <vector>
 
+#include "caffe/common.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/layer.hpp"
 #include "caffe/net.hpp"
 #include "caffe/util/io.hpp"
 #include "caffe/util/insert_splits.hpp"
+#include "caffe/util/upgrade_proto.hpp"
 
 using std::pair;
 using std::map;
@@ -25,7 +27,7 @@ Net<Dtype>::Net(const NetParameter& param) {
 template <typename Dtype>
 Net<Dtype>::Net(const string& param_file) {
   NetParameter param;
-  ReadProtoFromTextFile(param_file, &param);
+  ReadNetParamsFromTextFileOrDie(param_file, &param);
   Init(param);
 }
 
@@ -33,7 +35,7 @@ template <typename Dtype>
 void Net<Dtype>::Init(const NetParameter& in_param) {
   // Create a copy of in_param with splits added where necessary.
   NetParameter param;
-  insert_splits(in_param, &param);
+  InsertSplits(in_param, &param);
   // Basically, build all the layers and set up its connections.
   name_ = param.name();
   map<string, int> blob_name_to_idx;
@@ -67,15 +69,14 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   top_id_vecs_.resize(param.layers_size());
   for (int i = 0; i < param.layers_size(); ++i) {
     bool in_place = false;
-    const LayerConnection& layer_connection = param.layers(i);
-    const LayerParameter& layer_param = layer_connection.layer();
+    const LayerParameter& layer_param = param.layers(i);
     layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param)));
     layer_names_.push_back(layer_param.name());
     LOG(INFO) << "Creating Layer " << layer_param.name();
     bool need_backward = param.force_backward();
     // Figure out this layer's input and output
-    for (int j = 0; j < layer_connection.bottom_size(); ++j) {
-      const string& blob_name = layer_connection.bottom(j);
+    for (int j = 0; j < layer_param.bottom_size(); ++j) {
+      const string& blob_name = layer_param.bottom(j);
       const int blob_id = blob_name_to_idx[blob_name];
       if (available_blobs.find(blob_name) == available_blobs.end()) {
         LOG(FATAL) << "Unknown blob input " << blob_name <<
@@ -89,11 +90,11 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
       need_backward |= blob_need_backward_[blob_id];
       available_blobs.erase(blob_name);
     }
-    for (int j = 0; j < layer_connection.top_size(); ++j) {
-      const string& blob_name = layer_connection.top(j);
+    for (int j = 0; j < layer_param.top_size(); ++j) {
+      const string& blob_name = layer_param.top(j);
       // Check if we are doing in-place computation
-      if (layer_connection.bottom_size() > j &&
-          blob_name == layer_connection.bottom(j)) {
+      if (layer_param.bottom_size() > j &&
+          blob_name == layer_param.bottom(j)) {
         // In-place computation
         LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)";
         in_place = true;
@@ -272,7 +273,7 @@ template <typename Dtype>
 void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
   int num_source_layers = param.layers_size();
   for (int i = 0; i < num_source_layers; ++i) {
-    const LayerParameter& source_layer = param.layers(i).layer();
+    const LayerParameter& source_layer = param.layers(i);
     const string& source_layer_name = source_layer.name();
     int target_layer_id = 0;
     while (target_layer_id != layer_names_.size() &&
@@ -301,7 +302,7 @@ void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
 template <typename Dtype>
 void Net<Dtype>::CopyTrainedLayersFrom(const string trained_filename) {
   NetParameter param;
-  ReadProtoFromBinaryFile(trained_filename, &param);
+  ReadNetParamsFromBinaryFileOrDie(trained_filename, &param);
   CopyTrainedLayersFrom(param);
 }
 
@@ -315,15 +316,14 @@ void Net<Dtype>::ToProto(NetParameter* param, bool write_diff) {
   }
   DLOG(INFO) << "Serializing " << layers_.size() << " layers";
   for (int i = 0; i < layers_.size(); ++i) {
-    LayerConnection* layer_connection = param->add_layers();
+    LayerParameter* layer_param = param->add_layers();
     for (int j = 0; j < bottom_id_vecs_[i].size(); ++j) {
-      layer_connection->add_bottom(blob_names_[bottom_id_vecs_[i][j]]);
+      layer_param->add_bottom(blob_names_[bottom_id_vecs_[i][j]]);
     }
     for (int j = 0; j < top_id_vecs_[i].size(); ++j) {
-      layer_connection->add_top(blob_names_[top_id_vecs_[i][j]]);
+      layer_param->add_top(blob_names_[top_id_vecs_[i][j]]);
     }
-    LayerParameter* layer_parameter = layer_connection->mutable_layer();
-    layers_[i]->ToProto(layer_parameter, write_diff);
+    layers_[i]->ToProto(layer_param, write_diff);
   }
 }
 
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 77d6d571151..1a31109f784 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -7,8 +7,8 @@ message BlobProto {
   optional int32 channels = 2 [default = 0];
   optional int32 height = 3 [default = 0];
   optional int32 width = 4 [default = 0];
-  repeated float data = 5 [packed=true];
-  repeated float diff = 6 [packed=true];
+  repeated float data = 5 [packed = true];
+  repeated float diff = 6 [packed = true];
 }
 
 // The BlobProtoVector is simply a way to pass multiple blobproto instances
@@ -38,7 +38,281 @@ message FillerParameter {
   optional float std = 6 [default = 1]; // the std value in gaussian filler
 }
 
+message NetParameter {
+  optional string name = 1; // consider giving the network a name
+  repeated LayerParameter layers = 2; // a bunch of layers.
+  // The input blobs to the network.
+  repeated string input = 3;
+  // The dim of the input blobs. For each input blob there should be four
+  // values specifying the num, channels, height and width of the input blob.
+  // Thus, there should be a total of (4 * #input) numbers.
+  repeated int32 input_dim = 4;
+  // Whether the network will force every layer to carry out backward operation.
+  // If set False, then whether to carry out backward is determined
+  // automatically according to the net structure and learning rates.
+  optional bool force_backward = 5 [default = false];
+}
+
+message SolverParameter {
+  optional string train_net = 1; // The proto file for the training net.
+  optional string test_net = 2; // The proto file for the testing net.
+  // The number of iterations for each testing phase.
+  optional int32 test_iter = 3 [default = 0];
+  // The number of iterations between two testing phases.
+  optional int32 test_interval = 4 [default = 0];
+  optional float base_lr = 5; // The base learning rate
+  // the number of iterations between displaying info. If display = 0, no info
+  // will be displayed.
+  optional int32 display = 6;
+  optional int32 max_iter = 7; // the maximum number of iterations
+  optional string lr_policy = 8; // The learning rate decay policy.
+  optional float gamma = 9; // The parameter to compute the learning rate.
+  optional float power = 10; // The parameter to compute the learning rate.
+  optional float momentum = 11; // The momentum value.
+  optional float weight_decay = 12; // The weight decay.
+  optional int32 stepsize = 13; // the stepsize for learning rate policy "step"
+  optional int32 snapshot = 14 [default = 0]; // The snapshot interval
+  optional string snapshot_prefix = 15; // The prefix for the snapshot.
+  // whether to snapshot diff in the results or not. Snapshotting diff will help
+  // debugging but the final protocol buffer size will be much larger.
+  optional bool snapshot_diff = 16 [default = false];
+  // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
+  optional int32 solver_mode = 17 [default = 1];
+  // the device_id will that be used in GPU mode. Use device_id = 0 in default.
+  optional int32 device_id = 18 [default = 0];
+}
+
+// A message that stores the solver snapshots
+message SolverState {
+  optional int32 iter = 1; // The current iteration
+  optional string learned_net = 2; // The file that stores the learned net.
+  repeated BlobProto history = 3; // The history for sgd solvers
+}
+
 message LayerParameter {
+  repeated string bottom = 2; // the name of the bottom blobs
+  repeated string top = 3; // the name of the top blobs
+  optional string name = 4; // the layer name
+
+  // Add new LayerTypes to the enum below in lexicographical order (other than
+  // starting with NONE), starting with the next available ID in the comment
+  // line above the enum. Update the next available ID when you add a new
+  // LayerType.
+  //
+  // LayerType next available ID: 25
+  enum LayerType {
+    // "NONE" layer type is 0th enum element so that we don't cause confusion
+    // by defaulting to an existent LayerType (instead, should usually error if
+    // the type is unspecified).
+    NONE = 0;
+    ACCURACY = 1;
+    BNLL = 2;
+    CONCAT = 3;
+    CONVOLUTION = 4;
+    DATA = 5;
+    DROPOUT = 6;
+    EUCLIDEAN_LOSS = 7;
+    FLATTEN = 8;
+    HDF5_DATA = 9;
+    HDF5_OUTPUT = 10;
+    IM2COL = 11;
+    IMAGE_DATA = 12;
+    INFOGAIN_LOSS = 13;
+    INNER_PRODUCT = 14;
+    LRN = 15;
+    MULTINOMIAL_LOGISTIC_LOSS = 16;
+    POOLING = 17;
+    RELU = 18;
+    SIGMOID = 19;
+    SOFTMAX = 20;
+    SOFTMAX_LOSS = 21;
+    SPLIT = 22;
+    TANH = 23;
+    WINDOW_DATA = 24;
+  }
+  optional LayerType type = 5; // the layer type from the enum above
+
+  // The blobs containing the numeric parameters of the layer
+  repeated BlobProto blobs = 6;
+  // The ratio that is multiplied on the global learning rate. If you want to
+  // set the learning ratio for one blob, you need to set it for all blobs.
+  repeated float blobs_lr = 7;
+  // The weight decay that is multiplied on the global weight decay.
+  repeated float weight_decay = 8;
+
+  // Parameters for particular layer types.
+  optional ConcatParameter concat_param = 9;
+  optional ConvolutionParameter convolution_param = 10;
+  optional DataParameter data_param = 11;
+  optional DropoutParameter dropout_param = 12;
+  optional HDF5DataParameter hdf5_data_param = 13;
+  optional HDF5OutputParameter hdf5_output_param = 14;
+  optional ImageDataParameter image_data_param = 15;
+  optional InfogainLossParameter infogain_loss_param = 16;
+  optional InnerProductParameter inner_product_param = 17;
+  optional LRNParameter lrn_param = 18;
+  optional PoolingParameter pooling_param = 19;
+  optional WindowDataParameter window_data_param = 20;
+
+  // DEPRECATED: The layer parameters specified as a V0LayerParameter.
+  // This should never be used by any code except to upgrade to the new
+  // LayerParameter specification.
+  optional V0LayerParameter layer = 1;
+}
+
+// Message that stores parameters used by ConcatLayer
+message ConcatParameter {
+  // Concat Layer needs to specify the dimension along the concat will happen,
+  // the other dimensions must be the same for all the bottom blobs
+  // By default it will concatenate blobs along channels dimension
+  optional uint32 concat_dim = 1 [default = 1];
+}
+
+// Message that stores parameters used by ConvolutionLayer
+message ConvolutionParameter {
+  optional uint32 num_output = 1; // The number of outputs for the layer
+  optional bool bias_term = 2 [default = true]; // whether to have bias terms
+  optional uint32 pad = 3 [default = 0]; // The padding size
+  optional uint32 kernel_size = 4; // The kernel size
+  optional uint32 group = 5 [default = 1]; // The group size for group conv
+  optional uint32 stride = 6 [default = 1]; // The stride
+  optional FillerParameter weight_filler = 7; // The filler for the weight
+  optional FillerParameter bias_filler = 8; // The filler for the bias
+}
+
+// Message that stores parameters used by DataLayer
+message DataParameter {
+  // Specify the data source.
+  optional string source = 1;
+  // For data pre-processing, we can do simple scaling and subtracting the
+  // data mean, if provided. Note that the mean subtraction is always carried
+  // out before scaling.
+  optional float scale = 2 [default = 1];
+  optional string mean_file = 3;
+  // Specify the batch size.
+  optional uint32 batch_size = 4;
+  // Specify if we would like to randomly crop an image.
+  optional uint32 crop_size = 5 [default = 0];
+  // Specify if we want to randomly mirror data.
+  optional bool mirror = 6 [default = false];
+  // The rand_skip variable is for the data layer to skip a few data points
+  // to avoid all asynchronous sgd clients to start at the same point. The skip
+  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
+  // be larger than the number of keys in the leveldb.
+  optional uint32 rand_skip = 7 [default = 0];
+}
+
+// Message that stores parameters used by DropoutLayer
+message DropoutParameter {
+  optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
+}
+
+// Message that stores parameters used by HDF5DataLayer
+message HDF5DataParameter {
+  // Specify the data source.
+  optional string source = 1;
+  // Specify the batch size.
+  optional uint32 batch_size = 2;
+}
+
+// Message that stores parameters used by HDF5OutputLayer
+message HDF5OutputParameter {
+  optional string file_name = 1;
+}
+
+// Message that stores parameters used by ImageDataLayer
+message ImageDataParameter {
+  // Specify the data source.
+  optional string source = 1;
+  // For data pre-processing, we can do simple scaling and subtracting the
+  // data mean, if provided. Note that the mean subtraction is always carried
+  // out before scaling.
+  optional float scale = 2 [default = 1];
+  optional string mean_file = 3;
+  // Specify the batch size.
+  optional uint32 batch_size = 4;
+  // Specify if we would like to randomly crop an image.
+  optional uint32 crop_size = 5 [default = 0];
+  // Specify if we want to randomly mirror data.
+  optional bool mirror = 6 [default = false];
+  // The rand_skip variable is for the data layer to skip a few data points
+  // to avoid all asynchronous sgd clients to start at the same point. The skip
+  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
+  // be larger than the number of keys in the leveldb.
+  optional uint32 rand_skip = 7 [default = 0];
+  // Whether or not ImageLayer should shuffle the list of files at every epoch.
+  optional bool shuffle = 8 [default = false];
+  // It will also resize images if new_height or new_width are not zero.
+  optional uint32 new_height = 9 [default = 0];
+  optional uint32 new_width = 10 [default = 0];
+}
+
+// Message that stores parameters InfogainLossLayer
+message InfogainLossParameter {
+  // Specify the infogain matrix source.
+  optional string source = 1;
+}
+
+// Message that stores parameters used by InnerProductLayer
+message InnerProductParameter {
+  optional uint32 num_output = 1; // The number of outputs for the layer
+  optional bool bias_term = 2 [default = true]; // whether to have bias terms
+  optional FillerParameter weight_filler = 3; // The filler for the weight
+  optional FillerParameter bias_filler = 4; // The filler for the bias
+}
+
+// Message that stores parameters used by LRNLayer
+message LRNParameter {
+  optional uint32 local_size = 1 [default = 5]; // for local response norm
+  optional float alpha = 2 [default = 1.]; // for local response norm
+  optional float beta = 3 [default = 0.75]; // for local response norm
+}
+
+// Message that stores parameters used by PoolingLayer
+message PoolingParameter {
+  enum PoolMethod {
+    MAX = 0;
+    AVE = 1;
+    STOCHASTIC = 2;
+  }
+  optional PoolMethod pool = 1 [default = MAX]; // The pooling method
+  optional uint32 kernel_size = 2; // The kernel size
+  optional uint32 stride = 3 [default = 1]; // The stride
+}
+
+// Message that stores parameters used by WindowDataLayer
+message WindowDataParameter {
+  // Specify the data source.
+  optional string source = 1;
+  // For data pre-processing, we can do simple scaling and subtracting the
+  // data mean, if provided. Note that the mean subtraction is always carried
+  // out before scaling.
+  optional float scale = 2 [default = 1];
+  optional string mean_file = 3;
+  // Specify the batch size.
+  optional uint32 batch_size = 4;
+  // Specify if we would like to randomly crop an image.
+  optional uint32 crop_size = 5 [default = 0];
+  // Specify if we want to randomly mirror data.
+  optional bool mirror = 6 [default = false];
+  // Foreground (object) overlap threshold
+  optional float fg_threshold = 7 [default = 0.5];
+  // Background (non-object) overlap threshold
+  optional float bg_threshold = 8 [default = 0.5];
+  // Fraction of batch that should be foreground objects
+  optional float fg_fraction = 9 [default = 0.25];
+  // Amount of contextual padding to add around a window
+  // (used only by the window_data_layer)
+  optional uint32 context_pad = 10 [default = 0];
+  // Mode for cropping out a detection window
+  // warp: cropped window is warped to a fixed size and aspect ratio
+  // square: the tightest square around the window is cropped
+  optional string crop_mode = 11 [default = "warp"];
+}
+
+// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
+// in Caffe.  We keep this message type around for legacy support.
+message V0LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the string to specify the layer type
 
@@ -69,7 +343,7 @@ message LayerParameter {
   // For data pre-processing, we can do simple scaling and subtracting the
   // data mean, if provided. Note that the mean subtraction is always carried
   // out before scaling.
-  optional float scale = 17 [ default = 1 ];
+  optional float scale = 17 [default = 1];
   optional string meanfile = 18;
   // For data layers, specify the batch size.
   optional uint32 batchsize = 19;
@@ -90,7 +364,7 @@ message LayerParameter {
   // to avoid all asynchronous sgd clients to start at the same point. The skip
   // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
   // be larger than the number of keys in the leveldb.
-  optional uint32 rand_skip = 53 [ default = 0 ];
+  optional uint32 rand_skip = 53 [default = 0];
 
   // Fields related to detection (det_*)
   // foreground (object) overlap threshold
@@ -100,7 +374,7 @@ message LayerParameter {
   // Fraction of batch that should be foreground objects
   optional float det_fg_fraction = 56 [default = 0.25];
 
-  // optional bool OBSOLETE_can_clobber = 57 [ default = true ];
+  // optional bool OBSOLETE_can_clobber = 57 [default = true];
 
   // Amount of contextual padding to add around a window
   // (used only by the window_data_layer)
@@ -128,64 +402,3 @@ message LayerParameter {
 
   optional HDF5OutputParameter hdf5_output_param = 1001;
 }
-
-message HDF5OutputParameter {
-  optional string file_name = 1;
-}
-
-message LayerConnection {
-  optional LayerParameter layer = 1; // the layer parameter
-  repeated string bottom = 2; // the name of the bottom blobs
-  repeated string top = 3; // the name of the top blobs
-}
-
-message NetParameter {
-  optional string name = 1; // consider giving the network a name
-  repeated LayerConnection layers = 2; // a bunch of layers.
-  // The input blobs to the network.
-  repeated string input = 3;
-  // The dim of the input blobs. For each input blob there should be four
-  // values specifying the num, channels, height and width of the input blob.
-  // Thus, there should be a total of (4 * #input) numbers.
-  repeated int32 input_dim = 4;
-  // Whether the network will force every layer to carry out backward operation.
-  // If set False, then whether to carry out backward is determined
-  // automatically according to the net structure and learning rates.
-  optional bool force_backward = 5 [ default = false ];
-}
-
-message SolverParameter {
-  optional string train_net = 1; // The proto file for the training net.
-  optional string test_net = 2; // The proto file for the testing net.
-  // The number of iterations for each testing phase.
-  optional int32 test_iter = 3 [ default = 0 ];
-  // The number of iterations between two testing phases.
-  optional int32 test_interval = 4 [ default = 0 ];
-  optional float base_lr = 5; // The base learning rate
-  // the number of iterations between displaying info. If display = 0, no info
-  // will be displayed.
-  optional int32 display = 6;
-  optional int32 max_iter = 7; // the maximum number of iterations
-  optional string lr_policy = 8; // The learning rate decay policy.
-  optional float gamma = 9; // The parameter to compute the learning rate.
-  optional float power = 10; // The parameter to compute the learning rate.
-  optional float momentum = 11; // The momentum value.
-  optional float weight_decay = 12; // The weight decay.
-  optional int32 stepsize = 13; // the stepsize for learning rate policy "step"
-  optional int32 snapshot = 14 [default = 0]; // The snapshot interval
-  optional string snapshot_prefix = 15; // The prefix for the snapshot.
-  // whether to snapshot diff in the results or not. Snapshotting diff will help
-  // debugging but the final protocol buffer size will be much larger.
-  optional bool snapshot_diff = 16 [ default = false];
-  // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
-  optional int32 solver_mode = 17 [default = 1];
-  // the device_id will that be used in GPU mode. Use device_id=0 in default.
-  optional int32 device_id = 18 [default = 0];
-}
-
-// A message that stores the solver snapshots
-message SolverState {
-  optional int32 iter = 1; // The current iteration
-  optional string learned_net = 2; // The file that stores the learned net.
-  repeated BlobProto history = 3; // The history for sgd solvers
-}
diff --git a/src/caffe/proto/caffe_pretty_print.proto b/src/caffe/proto/caffe_pretty_print.proto
new file mode 100644
index 00000000000..cfdce82c79f
--- /dev/null
+++ b/src/caffe/proto/caffe_pretty_print.proto
@@ -0,0 +1,18 @@
+// Copyright 2014 BVLC and contributors.
+
+package caffe;
+
+import "caffe/proto/caffe.proto";
+
+// A near-duplicate of NetParameter with fields re-numbered to beautify
+// automatic prototext dumps.  The main practical purpose is to print inputs
+// before layers, because having inputs at the end looks weird.
+// NetParameterPrettyPrint should never be used in code except for conversion
+// FROM NetParameter and subsequent dumping to proto text file.
+message NetParameterPrettyPrint {
+  optional string name = 1;
+  optional bool force_backward = 2 [default = false];
+  repeated string input = 3;
+  repeated int32 input_dim = 4;
+  repeated LayerParameter layers = 5;
+}
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 5fa150cbc07..2954bf96478 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -21,15 +21,11 @@ template <typename Dtype>
 Solver<Dtype>::Solver(const SolverParameter& param)
     : param_(param), net_(), test_net_() {
   // Scaffolding code
-  NetParameter train_net_param;
-  ReadProtoFromTextFile(param_.train_net(), &train_net_param);
   LOG(INFO) << "Creating training net.";
-  net_.reset(new Net<Dtype>(train_net_param));
+  net_.reset(new Net<Dtype>(param_.train_net()));
   if (param_.has_test_net()) {
     LOG(INFO) << "Creating testing net.";
-    NetParameter test_net_param;
-    ReadProtoFromTextFile(param_.test_net(), &test_net_param);
-    test_net_.reset(new Net<Dtype>(test_net_param));
+    test_net_.reset(new Net<Dtype>(param_.test_net()));
     CHECK_GT(param_.test_iter(), 0);
     CHECK_GT(param_.test_interval(), 0);
   }
diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp
index 8ce7ce16fa9..72e3c902cf1 100644
--- a/src/caffe/test/test_concat_layer.cpp
+++ b/src/caffe/test/test_concat_layer.cpp
@@ -60,7 +60,7 @@ TYPED_TEST_CASE(ConcatLayerTest, Dtypes);
 
 TYPED_TEST(ConcatLayerTest, TestSetupNum) {
   LayerParameter layer_param;
-  layer_param.set_concat_dim(0);
+  layer_param.mutable_concat_param()->set_concat_dim(0);
   ConcatLayer<TypeParam> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_1, &(this->blob_top_vec_));
   EXPECT_EQ(this->blob_top_->num(),
diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp
index db23680c3df..c8d79083513 100644
--- a/src/caffe/test/test_convolution_layer.cpp
+++ b/src/caffe/test/test_convolution_layer.cpp
@@ -46,9 +46,11 @@ TYPED_TEST_CASE(ConvolutionLayerTest, Dtypes);
 
 TYPED_TEST(ConvolutionLayerTest, TestSetup) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_num_output(4);
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
+  convolution_param->set_num_output(4);
   shared_ptr<Layer<TypeParam> > layer(
       new ConvolutionLayer<TypeParam>(layer_param));
   layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -57,8 +59,8 @@ TYPED_TEST(ConvolutionLayerTest, TestSetup) {
   EXPECT_EQ(this->blob_top_->height(), 2);
   EXPECT_EQ(this->blob_top_->width(), 2);
   // setting group should not change the shape
-  layer_param.set_num_output(3);
-  layer_param.set_group(3);
+  convolution_param->set_num_output(3);
+  convolution_param->set_group(3);
   layer.reset(new ConvolutionLayer<TypeParam>(layer_param));
   layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
   EXPECT_EQ(this->blob_top_->num(), 2);
@@ -74,13 +76,15 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolution) {
   ConstantFiller<TypeParam> filler(filler_param);
   filler.Fill(this->blob_bottom_);
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_num_output(4);
-  layer_param.mutable_weight_filler()->set_type("constant");
-  layer_param.mutable_weight_filler()->set_value(1);
-  layer_param.mutable_bias_filler()->set_type("constant");
-  layer_param.mutable_bias_filler()->set_value(0.1);
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
+  convolution_param->set_num_output(4);
+  convolution_param->mutable_weight_filler()->set_type("constant");
+  convolution_param->mutable_weight_filler()->set_value(1);
+  convolution_param->mutable_bias_filler()->set_type("constant");
+  convolution_param->mutable_bias_filler()->set_value(0.1);
   shared_ptr<Layer<TypeParam> > layer(
       new ConvolutionLayer<TypeParam>(layer_param));
   layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -120,14 +124,16 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolutionGroup) {
     }
   }
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_num_output(3);
-  layer_param.set_group(3);
-  layer_param.mutable_weight_filler()->set_type("constant");
-  layer_param.mutable_weight_filler()->set_value(1);
-  layer_param.mutable_bias_filler()->set_type("constant");
-  layer_param.mutable_bias_filler()->set_value(0.1);
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
+  convolution_param->set_num_output(3);
+  convolution_param->set_group(3);
+  convolution_param->mutable_weight_filler()->set_type("constant");
+  convolution_param->mutable_weight_filler()->set_value(1);
+  convolution_param->mutable_bias_filler()->set_type("constant");
+  convolution_param->mutable_bias_filler()->set_value(0.1);
   shared_ptr<Layer<TypeParam> > layer(
       new ConvolutionLayer<TypeParam>(layer_param));
   layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -167,11 +173,13 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolutionGroup) {
 
 TYPED_TEST(ConvolutionLayerTest, TestCPUGradient) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_num_output(2);
-  layer_param.mutable_weight_filler()->set_type("gaussian");
-  layer_param.mutable_bias_filler()->set_type("gaussian");
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
+  convolution_param->set_num_output(2);
+  convolution_param->mutable_weight_filler()->set_type("gaussian");
+  convolution_param->mutable_bias_filler()->set_type("gaussian");
   Caffe::set_mode(Caffe::CPU);
   ConvolutionLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-2, 1e-3);
@@ -181,12 +189,14 @@ TYPED_TEST(ConvolutionLayerTest, TestCPUGradient) {
 
 TYPED_TEST(ConvolutionLayerTest, TestCPUGradientGroup) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_num_output(3);
-  layer_param.set_group(3);
-  layer_param.mutable_weight_filler()->set_type("gaussian");
-  layer_param.mutable_bias_filler()->set_type("gaussian");
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
+  convolution_param->set_num_output(3);
+  convolution_param->set_group(3);
+  convolution_param->mutable_weight_filler()->set_type("gaussian");
+  convolution_param->mutable_bias_filler()->set_type("gaussian");
   Caffe::set_mode(Caffe::CPU);
   ConvolutionLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-2, 1e-3);
@@ -196,11 +206,13 @@ TYPED_TEST(ConvolutionLayerTest, TestCPUGradientGroup) {
 
 TYPED_TEST(ConvolutionLayerTest, TestGPUGradient) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_num_output(2);
-  layer_param.mutable_weight_filler()->set_type("gaussian");
-  layer_param.mutable_bias_filler()->set_type("gaussian");
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
+  convolution_param->set_num_output(2);
+  convolution_param->mutable_weight_filler()->set_type("gaussian");
+  convolution_param->mutable_bias_filler()->set_type("gaussian");
   Caffe::set_mode(Caffe::GPU);
   ConvolutionLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-2, 1e-3);
@@ -210,12 +222,14 @@ TYPED_TEST(ConvolutionLayerTest, TestGPUGradient) {
 
 TYPED_TEST(ConvolutionLayerTest, TestGPUGradientGroup) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_num_output(3);
-  layer_param.set_group(3);
-  layer_param.mutable_weight_filler()->set_type("gaussian");
-  layer_param.mutable_bias_filler()->set_type("gaussian");
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
+  convolution_param->set_num_output(3);
+  convolution_param->set_group(3);
+  convolution_param->mutable_weight_filler()->set_type("gaussian");
+  convolution_param->mutable_bias_filler()->set_type("gaussian");
   Caffe::set_mode(Caffe::GPU);
   ConvolutionLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-2, 1e-3);
diff --git a/src/caffe/test/test_data_layer.cpp b/src/caffe/test/test_data_layer.cpp
index 032b0eb3b7d..6b0838ab975 100644
--- a/src/caffe/test/test_data_layer.cpp
+++ b/src/caffe/test/test_data_layer.cpp
@@ -70,8 +70,9 @@ TYPED_TEST_CASE(DataLayerTest, Dtypes);
 
 TYPED_TEST(DataLayerTest, TestRead) {
   LayerParameter param;
-  param.set_batchsize(5);
-  param.set_source(this->filename);
+  DataParameter* data_param = param.mutable_data_param();
+  data_param->set_batch_size(5);
+  data_param->set_source(this->filename);
   DataLayer<TypeParam> layer(param);
   layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);
   EXPECT_EQ(this->blob_top_data_->num(), 5);
diff --git a/src/caffe/test/test_hdf5data_layer.cpp b/src/caffe/test/test_hdf5data_layer.cpp
index b03fe72636c..a0ed113b36e 100644
--- a/src/caffe/test/test_hdf5data_layer.cpp
+++ b/src/caffe/test/test_hdf5data_layer.cpp
@@ -57,9 +57,10 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) {
   // The data file we are reading has 10 rows and 8 columns,
   // with values from 0 to 10*8 reshaped in row-major order.
   LayerParameter param;
-  int batchsize = 5;
-  param.set_batchsize(batchsize);
-  param.set_source(*(this->filename));
+  HDF5DataParameter* hdf5_data_param = param.mutable_hdf5_data_param();
+  int batch_size = 5;
+  hdf5_data_param->set_batch_size(batch_size);
+  hdf5_data_param->set_source(*(this->filename));
   int num_rows = 10;
   int num_cols = 8;
   int height = 5;
@@ -68,12 +69,12 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) {
   // Test that the layer setup got the correct parameters.
   HDF5DataLayer<TypeParam> layer(param);
   layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);
-  EXPECT_EQ(this->blob_top_data_->num(), batchsize);
+  EXPECT_EQ(this->blob_top_data_->num(), batch_size);
   EXPECT_EQ(this->blob_top_data_->channels(), num_cols);
   EXPECT_EQ(this->blob_top_data_->height(), height);
   EXPECT_EQ(this->blob_top_data_->width(), width);
 
-  EXPECT_EQ(this->blob_top_label_->num(), batchsize);
+  EXPECT_EQ(this->blob_top_label_->num(), batch_size);
   EXPECT_EQ(this->blob_top_label_->channels(), 1);
   EXPECT_EQ(this->blob_top_label_->height(), 1);
   EXPECT_EQ(this->blob_top_label_->width(), 1);
@@ -94,20 +95,20 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) {
 
       // On even iterations, we're reading the first half of the data.
       // On odd iterations, we're reading the second half of the data.
-      int label_offset = (iter % 2 == 0) ? 0 : batchsize;
-      int data_offset = (iter % 2 == 0) ? 0 : batchsize * data_size;
+      int label_offset = (iter % 2 == 0) ? 0 : batch_size;
+      int data_offset = (iter % 2 == 0) ? 0 : batch_size * data_size;
 
       // Every two iterations we are reading the second file,
       // which has the same labels, but data is offset by total data size,
       // which is 2000 (see generate_sample_data).
       int file_offset = (iter % 4 < 2) ? 0 : 2000;
 
-      for (int i = 0; i < batchsize; ++i) {
+      for (int i = 0; i < batch_size; ++i) {
         EXPECT_EQ(
           label_offset + i,
           this->blob_top_label_->cpu_data()[i]);
       }
-      for (int i = 0; i < batchsize; ++i) {
+      for (int i = 0; i < batch_size; ++i) {
         for (int j = 0; j < num_cols; ++j) {
           for (int h = 0; h < height; ++h) {
             for (int w = 0; w < width; ++w) {
diff --git a/src/caffe/test/test_im2col_layer.cpp b/src/caffe/test/test_im2col_layer.cpp
index 31a01157748..7f677ca03d6 100644
--- a/src/caffe/test/test_im2col_layer.cpp
+++ b/src/caffe/test/test_im2col_layer.cpp
@@ -42,8 +42,10 @@ TYPED_TEST_CASE(Im2colLayerTest, Dtypes);
 
 TYPED_TEST(Im2colLayerTest, TestSetup) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
   Im2colLayer<TypeParam> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
   EXPECT_EQ(this->blob_top_->num(), 2);
@@ -54,8 +56,10 @@ TYPED_TEST(Im2colLayerTest, TestSetup) {
 
 TYPED_TEST(Im2colLayerTest, TestCPU) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
   Im2colLayer<TypeParam> layer(layer_param);
   Caffe::set_mode(Caffe::CPU);
   layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -69,8 +73,10 @@ TYPED_TEST(Im2colLayerTest, TestCPU) {
 
 TYPED_TEST(Im2colLayerTest, TestGPU) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
   Im2colLayer<TypeParam> layer(layer_param);
   Caffe::set_mode(Caffe::GPU);
   layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -84,8 +90,10 @@ TYPED_TEST(Im2colLayerTest, TestGPU) {
 
 TYPED_TEST(Im2colLayerTest, TestCPUGradient) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
   Caffe::set_mode(Caffe::CPU);
   Im2colLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-2, 1e-2);
@@ -95,8 +103,10 @@ TYPED_TEST(Im2colLayerTest, TestCPUGradient) {
 
 TYPED_TEST(Im2colLayerTest, TestGPUGradient) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_kernel_size(3);
+  convolution_param->set_stride(2);
   Caffe::set_mode(Caffe::GPU);
   Im2colLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-2, 1e-2);
diff --git a/src/caffe/test/test_images_layer.cpp b/src/caffe/test/test_image_data_layer.cpp
similarity index 75%
rename from src/caffe/test/test_images_layer.cpp
rename to src/caffe/test/test_image_data_layer.cpp
index e8ed7c18fc9..9a6271cbea8 100644
--- a/src/caffe/test/test_images_layer.cpp
+++ b/src/caffe/test/test_image_data_layer.cpp
@@ -22,9 +22,9 @@ namespace caffe {
 extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
 
 template <typename Dtype>
-class ImagesLayerTest : public ::testing::Test {
+class ImageDataLayerTest : public ::testing::Test {
  protected:
-  ImagesLayerTest()
+  ImageDataLayerTest()
       : blob_top_data_(new Blob<Dtype>()),
         blob_top_label_(new Blob<Dtype>()),
         filename(NULL) {}
@@ -41,7 +41,10 @@ class ImagesLayerTest : public ::testing::Test {
     outfile.close();
   }
 
-  virtual ~ImagesLayerTest() { delete blob_top_data_; delete blob_top_label_; }
+  virtual ~ImageDataLayerTest() {
+    delete blob_top_data_;
+    delete blob_top_label_;
+  }
 
   char* filename;
   Blob<Dtype>* const blob_top_data_;
@@ -51,14 +54,15 @@ class ImagesLayerTest : public ::testing::Test {
 };
 
 typedef ::testing::Types<float, double> Dtypes;
-TYPED_TEST_CASE(ImagesLayerTest, Dtypes);
+TYPED_TEST_CASE(ImageDataLayerTest, Dtypes);
 
-TYPED_TEST(ImagesLayerTest, TestRead) {
+TYPED_TEST(ImageDataLayerTest, TestRead) {
   LayerParameter param;
-  param.set_batchsize(5);
-  param.set_source(this->filename);
-  param.set_shuffle_images(false);
-  ImagesLayer<TypeParam> layer(param);
+  ImageDataParameter* image_data_param = param.mutable_image_data_param();
+  image_data_param->set_batch_size(5);
+  image_data_param->set_source(this->filename);
+  image_data_param->set_shuffle(false);
+  ImageDataLayer<TypeParam> layer(param);
   layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);
   EXPECT_EQ(this->blob_top_data_->num(), 5);
   EXPECT_EQ(this->blob_top_data_->channels(), 3);
@@ -77,14 +81,15 @@ TYPED_TEST(ImagesLayerTest, TestRead) {
   }
 }
 
-TYPED_TEST(ImagesLayerTest, TestResize) {
+TYPED_TEST(ImageDataLayerTest, TestResize) {
   LayerParameter param;
-  param.set_batchsize(5);
-  param.set_source(this->filename);
-  param.set_new_height(256);
-  param.set_new_width(256);
-  param.set_shuffle_images(false);
-  ImagesLayer<TypeParam> layer(param);
+  ImageDataParameter* image_data_param = param.mutable_image_data_param();
+  image_data_param->set_batch_size(5);
+  image_data_param->set_source(this->filename);
+  image_data_param->set_new_height(256);
+  image_data_param->set_new_width(256);
+  image_data_param->set_shuffle(false);
+  ImageDataLayer<TypeParam> layer(param);
   layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);
   EXPECT_EQ(this->blob_top_data_->num(), 5);
   EXPECT_EQ(this->blob_top_data_->channels(), 3);
@@ -103,12 +108,13 @@ TYPED_TEST(ImagesLayerTest, TestResize) {
   }
 }
 
-TYPED_TEST(ImagesLayerTest, TestShuffle) {
+TYPED_TEST(ImageDataLayerTest, TestShuffle) {
   LayerParameter param;
-  param.set_batchsize(5);
-  param.set_source(this->filename);
-  param.set_shuffle_images(true);
-  ImagesLayer<TypeParam> layer(param);
+  ImageDataParameter* image_data_param = param.mutable_image_data_param();
+  image_data_param->set_batch_size(5);
+  image_data_param->set_source(this->filename);
+  image_data_param->set_shuffle(true);
+  ImageDataLayer<TypeParam> layer(param);
   layer.SetUp(this->blob_bottom_vec_, &this->blob_top_vec_);
   EXPECT_EQ(this->blob_top_data_->num(), 5);
   EXPECT_EQ(this->blob_top_data_->channels(), 3);
diff --git a/src/caffe/test/test_innerproduct_layer.cpp b/src/caffe/test/test_inner_product_layer.cpp
similarity index 68%
rename from src/caffe/test/test_innerproduct_layer.cpp
rename to src/caffe/test/test_inner_product_layer.cpp
index 5b1831725de..91917df6cae 100644
--- a/src/caffe/test/test_innerproduct_layer.cpp
+++ b/src/caffe/test/test_inner_product_layer.cpp
@@ -42,7 +42,9 @@ TYPED_TEST_CASE(InnerProductLayerTest, Dtypes);
 
 TYPED_TEST(InnerProductLayerTest, TestSetUp) {
   LayerParameter layer_param;
-  layer_param.set_num_output(10);
+  InnerProductParameter* inner_product_param =
+      layer_param.mutable_inner_product_param();
+  inner_product_param->set_num_output(10);
   shared_ptr<InnerProductLayer<TypeParam> > layer(
       new InnerProductLayer<TypeParam>(layer_param));
   layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -54,12 +56,14 @@ TYPED_TEST(InnerProductLayerTest, TestSetUp) {
 
 TYPED_TEST(InnerProductLayerTest, TestCPU) {
   LayerParameter layer_param;
+  InnerProductParameter* inner_product_param =
+      layer_param.mutable_inner_product_param();
   Caffe::set_mode(Caffe::CPU);
-  layer_param.set_num_output(10);
-  layer_param.mutable_weight_filler()->set_type("uniform");
-  layer_param.mutable_bias_filler()->set_type("uniform");
-  layer_param.mutable_bias_filler()->set_min(1);
-  layer_param.mutable_bias_filler()->set_max(2);
+  inner_product_param->set_num_output(10);
+  inner_product_param->mutable_weight_filler()->set_type("uniform");
+  inner_product_param->mutable_bias_filler()->set_type("uniform");
+  inner_product_param->mutable_bias_filler()->set_min(1);
+  inner_product_param->mutable_bias_filler()->set_max(2);
   shared_ptr<InnerProductLayer<TypeParam> > layer(
       new InnerProductLayer<TypeParam>(layer_param));
   layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -74,12 +78,14 @@ TYPED_TEST(InnerProductLayerTest, TestCPU) {
 TYPED_TEST(InnerProductLayerTest, TestGPU) {
   if (sizeof(TypeParam) == 4 || CAFFE_TEST_CUDA_PROP.major >= 2) {
     LayerParameter layer_param;
+    InnerProductParameter* inner_product_param =
+        layer_param.mutable_inner_product_param();
     Caffe::set_mode(Caffe::GPU);
-    layer_param.set_num_output(10);
-    layer_param.mutable_weight_filler()->set_type("uniform");
-    layer_param.mutable_bias_filler()->set_type("uniform");
-    layer_param.mutable_bias_filler()->set_min(1);
-    layer_param.mutable_bias_filler()->set_max(2);
+    inner_product_param->set_num_output(10);
+    inner_product_param->mutable_weight_filler()->set_type("uniform");
+    inner_product_param->mutable_bias_filler()->set_type("uniform");
+    inner_product_param->mutable_bias_filler()->set_min(1);
+    inner_product_param->mutable_bias_filler()->set_max(2);
     shared_ptr<InnerProductLayer<TypeParam> > layer(
       new InnerProductLayer<TypeParam>(layer_param));
     layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -96,12 +102,14 @@ TYPED_TEST(InnerProductLayerTest, TestGPU) {
 
 TYPED_TEST(InnerProductLayerTest, TestCPUGradient) {
   LayerParameter layer_param;
+  InnerProductParameter* inner_product_param =
+      layer_param.mutable_inner_product_param();
   Caffe::set_mode(Caffe::CPU);
-  layer_param.set_num_output(10);
-  layer_param.mutable_weight_filler()->set_type("gaussian");
-  layer_param.mutable_bias_filler()->set_type("gaussian");
-  layer_param.mutable_bias_filler()->set_min(1);
-  layer_param.mutable_bias_filler()->set_max(2);
+  inner_product_param->set_num_output(10);
+  inner_product_param->mutable_weight_filler()->set_type("gaussian");
+  inner_product_param->mutable_bias_filler()->set_type("gaussian");
+  inner_product_param->mutable_bias_filler()->set_min(1);
+  inner_product_param->mutable_bias_filler()->set_max(2);
   InnerProductLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-2, 1e-3);
   checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
@@ -111,10 +119,12 @@ TYPED_TEST(InnerProductLayerTest, TestCPUGradient) {
 TYPED_TEST(InnerProductLayerTest, TestGPUGradient) {
   if (sizeof(TypeParam) == 4 || CAFFE_TEST_CUDA_PROP.major >= 2) {
     LayerParameter layer_param;
+    InnerProductParameter* inner_product_param =
+        layer_param.mutable_inner_product_param();
     Caffe::set_mode(Caffe::GPU);
-    layer_param.set_num_output(10);
-    layer_param.mutable_weight_filler()->set_type("gaussian");
-    layer_param.mutable_bias_filler()->set_type("gaussian");
+    inner_product_param->set_num_output(10);
+    inner_product_param->mutable_weight_filler()->set_type("gaussian");
+    inner_product_param->mutable_bias_filler()->set_type("gaussian");
     InnerProductLayer<TypeParam> layer(layer_param);
     GradientChecker<TypeParam> checker(1e-2, 1e-2);
     checker.CheckGradient(&layer, &(this->blob_bottom_vec_),
diff --git a/src/caffe/test/test_lrn_layer.cpp b/src/caffe/test/test_lrn_layer.cpp
index a96684d2160..6ad6d020c03 100644
--- a/src/caffe/test/test_lrn_layer.cpp
+++ b/src/caffe/test/test_lrn_layer.cpp
@@ -54,9 +54,10 @@ void LRNLayerTest<Dtype>::ReferenceLRNForward(
       blob_bottom.height(), blob_bottom.width());
   const Dtype* bottom_data = blob_bottom.cpu_data();
   Dtype* top_data = blob_top->mutable_cpu_data();
-  Dtype alpha = layer_param.alpha();
-  Dtype beta = layer_param.beta();
-  int size = layer_param.local_size();
+  LRNParameter lrn_param = layer_param.lrn_param();
+  Dtype alpha = lrn_param.alpha();
+  Dtype beta = lrn_param.beta();
+  int size = lrn_param.local_size();
   for (int n = 0; n < blob_bottom.num(); ++n) {
     for (int c = 0; c < blob_bottom.channels(); ++c) {
       for (int h = 0; h < blob_bottom.height(); ++h) {
diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 77b3516a980..4c7f0e7f7ac 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -18,17 +18,14 @@ namespace caffe {
 template <typename Dtype>
 class NetTest : public ::testing::Test {
  protected:
-  NetTest() : filename(NULL) {
-  }
-
   virtual void SetUp() {  // Create the leveldb
-    filename = tmpnam(NULL);  // get temp name
-    LOG(INFO) << "Using temporary leveldb " << filename;
+    filename_ = tmpnam(NULL);  // get temp name
+    LOG(INFO) << "Using temporary leveldb " << filename_;
     leveldb::DB* db;
     leveldb::Options options;
     options.error_if_exists = true;
     options.create_if_missing = true;
-    leveldb::Status status = leveldb::DB::Open(options, filename, &db);
+    leveldb::Status status = leveldb::DB::Open(options, filename_, &db);
     CHECK(status.ok());
     for (int i = 0; i < 5; ++i) {
       Datum datum;
@@ -49,19 +46,19 @@ class NetTest : public ::testing::Test {
     const string& proto_prefix =
         "name: 'TestNetwork' "
         "layers: { "
-        "  layer { "
-        "    name: 'data' "
-        "    type: 'data' ";
+        "  name: 'data' "
+        "  type: DATA "
+        "  data_param { ";
     const string& proto_suffix =
-        "    batchsize: 1 "
+        "    batch_size: 1 "
         "  } "
         "  top: 'data' "
         "  top: 'label' "
         "} "
         "layers: { "
-        "  layer { "
-        "    name: 'innerproduct' "
-        "    type: 'innerproduct' "
+        "  name: 'innerproduct' "
+        "  type: INNER_PRODUCT "
+        "  inner_product_param { "
         "    num_output: 1000 "
         "    weight_filler { "
         "      type: 'gaussian' "
@@ -71,31 +68,26 @@ class NetTest : public ::testing::Test {
         "      type: 'constant' "
         "      value: 0 "
         "    } "
-        "    blobs_lr: 1. "
-        "    blobs_lr: 2. "
-        "    weight_decay: 1. "
-        "    weight_decay: 0. "
         "  } "
+        "  blobs_lr: 1. "
+        "  blobs_lr: 2. "
+        "  weight_decay: 1. "
+        "  weight_decay: 0. "
         "  bottom: 'data' "
         "  top: 'innerproduct' "
         "} "
         "layers: { "
-        "  layer { "
-        "    name: 'loss' "
-        "    type: 'softmax_loss' "
-        "  } "
+        "  name: 'loss' "
+        "  type: SOFTMAX_LOSS "
         "  bottom: 'innerproduct' "
         "  bottom: 'label' "
         "} ";
-    proto = proto_prefix + "source: '" + string(this->filename) +
+    proto_ = proto_prefix + "source: '" + string(this->filename_) +
         "' " + proto_suffix;
   }
 
-  virtual ~NetTest() {
-  }
-
-  char* filename;
-  string proto;
+  char* filename_;
+  string proto_;
 };
 
 typedef ::testing::Types<float, double> Dtypes;
@@ -103,8 +95,7 @@ TYPED_TEST_CASE(NetTest, Dtypes);
 
 TYPED_TEST(NetTest, TestHasBlob) {
   NetParameter param;
-  CHECK(google::protobuf::TextFormat::ParseFromString(this->proto,
-                                                      &param));
+  CHECK(google::protobuf::TextFormat::ParseFromString(this->proto_, &param));
   Net<TypeParam> net(param);
   EXPECT_TRUE(net.has_blob("data"));
   EXPECT_TRUE(net.has_blob("label"));
@@ -114,8 +105,7 @@ TYPED_TEST(NetTest, TestHasBlob) {
 
 TYPED_TEST(NetTest, TestGetBlob) {
   NetParameter param;
-  CHECK(google::protobuf::TextFormat::ParseFromString(this->proto,
-                                                      &param));
+  CHECK(google::protobuf::TextFormat::ParseFromString(this->proto_, &param));
   Net<TypeParam> net(param);
   EXPECT_EQ(net.blob_by_name("data"), net.blobs()[0]);
   EXPECT_EQ(net.blob_by_name("label"), net.blobs()[1]);
@@ -125,8 +115,7 @@ TYPED_TEST(NetTest, TestGetBlob) {
 
 TYPED_TEST(NetTest, TestHasLayer) {
   NetParameter param;
-  CHECK(google::protobuf::TextFormat::ParseFromString(this->proto,
-                                                      &param));
+  CHECK(google::protobuf::TextFormat::ParseFromString(this->proto_, &param));
   Net<TypeParam> net(param);
   EXPECT_TRUE(net.has_layer("data"));
   EXPECT_TRUE(net.has_layer("innerproduct"));
@@ -136,8 +125,7 @@ TYPED_TEST(NetTest, TestHasLayer) {
 
 TYPED_TEST(NetTest, TestGetLayerByName) {
   NetParameter param;
-  CHECK(google::protobuf::TextFormat::ParseFromString(this->proto,
-                                                      &param));
+  CHECK(google::protobuf::TextFormat::ParseFromString(this->proto_, &param));
   Net<TypeParam> net(param);
   EXPECT_EQ(net.layer_by_name("data"), net.layers()[0]);
   EXPECT_EQ(net.layer_by_name("innerproduct"), net.layers()[1]);
diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp
index 105f321e7bb..cd733751a70 100644
--- a/src/caffe/test/test_neuron_layer.cpp
+++ b/src/caffe/test/test_neuron_layer.cpp
@@ -158,7 +158,7 @@ TYPED_TEST(NeuronLayerTest, TestDropoutCPU) {
   // Now, check values
   const TypeParam* bottom_data = this->blob_bottom_->cpu_data();
   const TypeParam* top_data = this->blob_top_->cpu_data();
-  float scale = 1. / (1. - layer_param.dropout_ratio());
+  float scale = 1. / (1. - layer_param.dropout_param().dropout_ratio());
   for (int i = 0; i < this->blob_bottom_->count(); ++i) {
     if (top_data[i] != 0) {
       EXPECT_EQ(top_data[i], bottom_data[i] * scale);
@@ -187,7 +187,7 @@ TYPED_TEST(NeuronLayerTest, TestDropoutCPUTestPhase) {
   // Now, check values
   const TypeParam* bottom_data = this->blob_bottom_->cpu_data();
   const TypeParam* top_data = this->blob_top_->cpu_data();
-  float scale = 1. / (1. - layer_param.dropout_ratio());
+  float scale = 1. / (1. - layer_param.dropout_param().dropout_ratio());
   for (int i = 0; i < this->blob_bottom_->count(); ++i) {
     if (top_data[i] != 0) {
       EXPECT_EQ(top_data[i], bottom_data[i]);
@@ -206,7 +206,7 @@ TYPED_TEST(NeuronLayerTest, TestDropoutGPU) {
   // Now, check values
   const TypeParam* bottom_data = this->blob_bottom_->cpu_data();
   const TypeParam* top_data = this->blob_top_->cpu_data();
-  float scale = 1. / (1. - layer_param.dropout_ratio());
+  float scale = 1. / (1. - layer_param.dropout_param().dropout_ratio());
   for (int i = 0; i < this->blob_bottom_->count(); ++i) {
     if (top_data[i] != 0) {
       EXPECT_EQ(top_data[i], bottom_data[i] * scale);
@@ -241,7 +241,7 @@ TYPED_TEST(NeuronLayerTest, TestDropoutGPUTestPhase) {
   // Now, check values
   const TypeParam* bottom_data = this->blob_bottom_->cpu_data();
   const TypeParam* top_data = this->blob_top_->cpu_data();
-  float scale = 1. / (1. - layer_param.dropout_ratio());
+  float scale = 1. / (1. - layer_param.dropout_param().dropout_ratio());
   for (int i = 0; i < this->blob_bottom_->count(); ++i) {
     if (top_data[i] != 0) {
       EXPECT_EQ(top_data[i], bottom_data[i]);
diff --git a/src/caffe/test/test_padding_layer.cpp b/src/caffe/test/test_padding_layer.cpp
deleted file mode 100644
index c775f3be351..00000000000
--- a/src/caffe/test/test_padding_layer.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2014 BVLC and contributors.
-
-#include <cuda_runtime.h>
-#include <cstring>
-#include <vector>
-
-#include "gtest/gtest.h"
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
-#include "caffe/filler.hpp"
-#include "caffe/vision_layers.hpp"
-#include "caffe/test/test_gradient_check_util.hpp"
-
-#include "caffe/test/test_caffe_main.hpp"
-
-namespace caffe {
-
-extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
-
-template <typename Dtype>
-class PaddingLayerTest : public ::testing::Test {
- protected:
-  PaddingLayerTest()
-      : blob_bottom_(new Blob<Dtype>(2, 3, 4, 5)),
-        blob_top_(new Blob<Dtype>()) {
-    // fill the values
-    FillerParameter filler_param;
-    GaussianFiller<Dtype> filler(filler_param);
-    filler.Fill(this->blob_bottom_);
-    blob_bottom_vec_.push_back(blob_bottom_);
-    blob_top_vec_.push_back(blob_top_);
-  }
-  virtual ~PaddingLayerTest() { delete blob_bottom_; delete blob_top_; }
-  Blob<Dtype>* const blob_bottom_;
-  Blob<Dtype>* const blob_top_;
-  vector<Blob<Dtype>*> blob_bottom_vec_;
-  vector<Blob<Dtype>*> blob_top_vec_;
-};
-
-typedef ::testing::Types<float, double> Dtypes;
-TYPED_TEST_CASE(PaddingLayerTest, Dtypes);
-
-TYPED_TEST(PaddingLayerTest, TestCPU) {
-  LayerParameter layer_param;
-  layer_param.set_pad(1);
-  Caffe::set_mode(Caffe::CPU);
-  PaddingLayer<TypeParam> layer(layer_param);
-  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
-  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
-  EXPECT_EQ(this->blob_top_->num(), 2);
-  EXPECT_EQ(this->blob_top_->channels(), 3);
-  EXPECT_EQ(this->blob_top_->height(), 6);
-  EXPECT_EQ(this->blob_top_->width(), 7);
-  for (int n = 0; n < 2; ++n) {
-    for (int c = 0; c < 3; ++c) {
-      for (int h = 0; h < 4; ++h) {
-        for (int w = 0; w < 5; ++w) {
-          EXPECT_EQ(this->blob_bottom_->data_at(n, c, h, w),
-              this->blob_top_->data_at(n, c, h + 1, w + 1));
-        }
-      }
-    }
-  }
-}
-
-TYPED_TEST(PaddingLayerTest, TestCPUGrad) {
-  LayerParameter layer_param;
-  layer_param.set_pad(1);
-  Caffe::set_mode(Caffe::CPU);
-  PaddingLayer<TypeParam> layer(layer_param);
-  GradientChecker<TypeParam> checker(1e-2, 1e-3);
-  checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
-      &(this->blob_top_vec_));
-}
-
-TYPED_TEST(PaddingLayerTest, TestGPU) {
-  if (CAFFE_TEST_CUDA_PROP.major >= 2) {
-    LayerParameter layer_param;
-    layer_param.set_pad(1);
-    Caffe::set_mode(Caffe::GPU);
-    PaddingLayer<TypeParam> layer(layer_param);
-    layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
-    layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
-    EXPECT_EQ(this->blob_top_->num(), 2);
-    EXPECT_EQ(this->blob_top_->channels(), 3);
-    EXPECT_EQ(this->blob_top_->height(), 6);
-    EXPECT_EQ(this->blob_top_->width(), 7);
-    for (int n = 0; n < 2; ++n) {
-      for (int c = 0; c < 3; ++c) {
-        for (int h = 0; h < 4; ++h) {
-          for (int w = 0; w < 5; ++w) {
-            EXPECT_EQ(this->blob_bottom_->data_at(n, c, h, w),
-                this->blob_top_->data_at(n, c, h + 1, w + 1));
-          }
-        }
-      }
-    }
-  } else {
-    LOG(ERROR) << "Skipping test (gpu version too low).";
-  }
-}
-
-TYPED_TEST(PaddingLayerTest, TestGPUGrad) {
-  if (CAFFE_TEST_CUDA_PROP.major >= 2) {
-    LayerParameter layer_param;
-    layer_param.set_pad(1);
-    Caffe::set_mode(Caffe::GPU);
-    PaddingLayer<TypeParam> layer(layer_param);
-    GradientChecker<TypeParam> checker(1e-2, 1e-3);
-    checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_),
-        &(this->blob_top_vec_));
-  } else {
-    LOG(ERROR) << "Skipping test (gpu version too low).";
-  }
-}
-
-}  // namespace caffe
diff --git a/src/caffe/test/test_pooling_layer.cpp b/src/caffe/test/test_pooling_layer.cpp
index 11b9ce23583..d1246a098c8 100644
--- a/src/caffe/test/test_pooling_layer.cpp
+++ b/src/caffe/test/test_pooling_layer.cpp
@@ -45,8 +45,9 @@ TYPED_TEST_CASE(PoolingLayerTest, Dtypes);
 
 TYPED_TEST(PoolingLayerTest, TestSetup) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
+  PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
+  pooling_param->set_kernel_size(3);
+  pooling_param->set_stride(2);
   PoolingLayer<TypeParam> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
   EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_->num());
@@ -58,9 +59,10 @@ TYPED_TEST(PoolingLayerTest, TestSetup) {
 /*
 TYPED_TEST(PoolingLayerTest, PrintGPUBackward) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_pool(LayerParameter_PoolMethod_MAX);
+  PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
+  pooling_param->set_kernel_size(3);
+  pooling_param->set_stride(2);
+  pooling_param->set_pool(PoolingParameter_PoolMethod_MAX);
   Caffe::set_mode(Caffe::GPU);
   PoolingLayer<TypeParam> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -84,9 +86,10 @@ TYPED_TEST(PoolingLayerTest, PrintGPUBackward) {
 
 TYPED_TEST(PoolingLayerTest, TestCPUGradientMax) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_pool(LayerParameter_PoolMethod_MAX);
+  PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
+  pooling_param->set_kernel_size(3);
+  pooling_param->set_stride(2);
+  pooling_param->set_pool(PoolingParameter_PoolMethod_MAX);
   Caffe::set_mode(Caffe::CPU);
   PoolingLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-4, 1e-2);
@@ -96,9 +99,10 @@ TYPED_TEST(PoolingLayerTest, TestCPUGradientMax) {
 
 TYPED_TEST(PoolingLayerTest, TestGPUGradientMax) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_pool(LayerParameter_PoolMethod_MAX);
+  PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
+  pooling_param->set_kernel_size(3);
+  pooling_param->set_stride(2);
+  pooling_param->set_pool(PoolingParameter_PoolMethod_MAX);
   Caffe::set_mode(Caffe::GPU);
   PoolingLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-4, 1e-2);
@@ -109,9 +113,10 @@ TYPED_TEST(PoolingLayerTest, TestGPUGradientMax) {
 
 TYPED_TEST(PoolingLayerTest, TestCPUGradientAve) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_pool(LayerParameter_PoolMethod_AVE);
+  PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
+  pooling_param->set_kernel_size(3);
+  pooling_param->set_stride(2);
+  pooling_param->set_pool(PoolingParameter_PoolMethod_AVE);
   Caffe::set_mode(Caffe::CPU);
   PoolingLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-2, 1e-2);
@@ -122,9 +127,10 @@ TYPED_TEST(PoolingLayerTest, TestCPUGradientAve) {
 
 TYPED_TEST(PoolingLayerTest, TestGPUGradientAve) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-  layer_param.set_pool(LayerParameter_PoolMethod_AVE);
+  PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
+  pooling_param->set_kernel_size(3);
+  pooling_param->set_stride(2);
+  pooling_param->set_pool(PoolingParameter_PoolMethod_AVE);
   Caffe::set_mode(Caffe::GPU);
   PoolingLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-2, 1e-2);
diff --git a/src/caffe/test/test_protobuf.cpp b/src/caffe/test/test_protobuf.cpp
index 0618d8c5d80..182af2e4611 100644
--- a/src/caffe/test/test_protobuf.cpp
+++ b/src/caffe/test/test_protobuf.cpp
@@ -16,7 +16,7 @@ class ProtoTest : public ::testing::Test {};
 TEST_F(ProtoTest, TestSerialization) {
   LayerParameter param;
   param.set_name("test");
-  param.set_type("dummy");
+  param.set_type(LayerParameter_LayerType_NONE);
   std::cout << "Printing in binary format." << std::endl;
   std::cout << param.SerializeAsString() << std::endl;
   std::cout << "Printing in text format." << std::endl;
diff --git a/src/caffe/test/test_split_layer.cpp b/src/caffe/test/test_split_layer.cpp
index 2c7c986e99b..3c17b0e2450 100644
--- a/src/caffe/test/test_split_layer.cpp
+++ b/src/caffe/test/test_split_layer.cpp
@@ -160,7 +160,7 @@ class SplitLayerInsertionTest : public ::testing::Test {
  protected:
   void RunInsertionTest(
       const string& input_param_string, const string& output_param_string) {
-    // Test that insert_splits called on the proto specified by
+    // Test that InsertSplits called on the proto specified by
     // input_param_string results in the proto specified by
     // output_param_string.
     NetParameter input_param;
@@ -170,12 +170,12 @@ class SplitLayerInsertionTest : public ::testing::Test {
     CHECK(google::protobuf::TextFormat::ParseFromString(
         output_param_string, &expected_output_param));
     NetParameter actual_output_param;
-    insert_splits(input_param, &actual_output_param);
+    InsertSplits(input_param, &actual_output_param);
     EXPECT_EQ(expected_output_param.DebugString(),
         actual_output_param.DebugString());
     // Also test idempotence.
     NetParameter double_split_insert_param;
-    insert_splits(actual_output_param, &double_split_insert_param);
+    InsertSplits(actual_output_param, &double_split_insert_param);
     EXPECT_EQ(actual_output_param.DebugString(),
        double_split_insert_param.DebugString());
   }
@@ -188,26 +188,20 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertion1) {
   const string& input_proto =
       "name: 'TestNetwork' "
       "layers: { "
-      "  layer { "
-      "    name: 'data' "
-      "    type: 'data' "
-      "  } "
+      "  name: 'data' "
+      "  type: DATA "
       "  top: 'data' "
       "  top: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss' "
-      "    type: 'softmax_with_loss' "
-      "  } "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
       "  bottom: 'innerprod' "
       "  bottom: 'label' "
       "} ";
@@ -218,43 +212,33 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertion2) {
   const string& input_proto =
       "name: 'TestNetwork' "
       "layers: { "
-      "  layer { "
-      "    name: 'data' "
-      "    type: 'data' "
-      "  } "
+      "  name: 'data' "
+      "  type: DATA "
       "  top: 'data' "
       "  top: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'data_split' "
-      "    type: 'split' "
-      "  } "
+      "  name: 'data_split' "
+      "  type: SPLIT "
       "  bottom: 'data' "
       "  top: 'data_split_0' "
       "  top: 'data_split_1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod1' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod1' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data_split_0' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod2' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod2' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data_split_1' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod1' "
       "  bottom: 'innerprod2' "
       "} ";
@@ -265,24 +249,24 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
   const string& input_proto =
       "name: 'CaffeNet' "
       "layers { "
-      "  layer { "
-      "    name: 'data' "
-      "    type: 'data' "
+      "  name: 'data' "
+      "  type: DATA "
+      "  data_param { "
       "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
-      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
-      "    batchsize: 256 "
-      "    cropsize: 227 "
+      "    mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batch_size: 256 "
+      "    crop_size: 227 "
       "    mirror: true "
       "  } "
       "  top: 'data' "
       "  top: 'label' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'conv1' "
-      "    type: 'conv' "
+      "  name: 'conv1' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
       "    num_output: 96 "
-      "    kernelsize: 11 "
+      "    kernel_size: 11 "
       "    stride: 4 "
       "    weight_filler { "
       "      type: 'gaussian' "
@@ -292,37 +276,35 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "      type: 'constant' "
       "      value: 0. "
       "    } "
-      "    blobs_lr: 1. "
-      "    blobs_lr: 2. "
-      "    weight_decay: 1. "
-      "    weight_decay: 0. "
       "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
       "  bottom: 'data' "
       "  top: 'conv1' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'relu1' "
-      "    type: 'relu' "
-      "  } "
+      "  name: 'relu1' "
+      "  type: RELU "
       "  bottom: 'conv1' "
       "  top: 'conv1' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'pool1' "
-      "    type: 'pool' "
+      "  name: 'pool1' "
+      "  type: POOLING "
+      "  pooling_param { "
       "    pool: MAX "
-      "    kernelsize: 3 "
+      "    kernel_size: 3 "
       "    stride: 2 "
       "  } "
       "  bottom: 'conv1' "
       "  top: 'pool1' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'norm1' "
-      "    type: 'lrn' "
+      "  name: 'norm1' "
+      "  type: LRN "
+      "  lrn_param { "
       "    local_size: 5 "
       "    alpha: 0.0001 "
       "    beta: 0.75 "
@@ -331,21 +313,13 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "  top: 'norm1' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'pad2' "
-      "    type: 'padding' "
-      "    pad: 2 "
-      "  } "
-      "  bottom: 'norm1' "
-      "  top: 'pad2' "
-      "} "
-      "layers { "
-      "  layer { "
-      "    name: 'conv2' "
-      "    type: 'conv' "
+      "  name: 'conv2' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
       "    num_output: 256 "
       "    group: 2 "
-      "    kernelsize: 5 "
+      "    kernel_size: 5 "
+      "    pad: 2 "
       "    weight_filler { "
       "      type: 'gaussian' "
       "      std: 0.01 "
@@ -354,37 +328,35 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "      type: 'constant' "
       "      value: 1. "
       "    } "
-      "    blobs_lr: 1. "
-      "    blobs_lr: 2. "
-      "    weight_decay: 1. "
-      "    weight_decay: 0. "
       "  } "
-      "  bottom: 'pad2' "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'norm1' "
       "  top: 'conv2' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'relu2' "
-      "    type: 'relu' "
-      "  } "
+      "  name: 'relu2' "
+      "  type: RELU "
       "  bottom: 'conv2' "
       "  top: 'conv2' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'pool2' "
-      "    type: 'pool' "
+      "  name: 'pool2' "
+      "  type: POOLING "
+      "  pooling_param { "
       "    pool: MAX "
-      "    kernelsize: 3 "
+      "    kernel_size: 3 "
       "    stride: 2 "
       "  } "
       "  bottom: 'conv2' "
       "  top: 'pool2' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'norm2' "
-      "    type: 'lrn' "
+      "  name: 'norm2' "
+      "  type: LRN "
+      "  lrn_param { "
       "    local_size: 5 "
       "    alpha: 0.0001 "
       "    beta: 0.75 "
@@ -393,20 +365,12 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "  top: 'norm2' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'pad3' "
-      "    type: 'padding' "
-      "    pad: 1 "
-      "  } "
-      "  bottom: 'norm2' "
-      "  top: 'pad3' "
-      "} "
-      "layers { "
-      "  layer { "
-      "    name: 'conv3' "
-      "    type: 'conv' "
+      "  name: 'conv3' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
       "    num_output: 384 "
-      "    kernelsize: 3 "
+      "    kernel_size: 3 "
+      "    pad: 1 "
       "    weight_filler { "
       "      type: 'gaussian' "
       "      std: 0.01 "
@@ -415,38 +379,28 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "      type: 'constant' "
       "      value: 0. "
       "    } "
-      "    blobs_lr: 1. "
-      "    blobs_lr: 2. "
-      "    weight_decay: 1. "
-      "    weight_decay: 0. "
       "  } "
-      "  bottom: 'pad3' "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'norm2' "
       "  top: 'conv3' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'relu3' "
-      "    type: 'relu' "
-      "  } "
+      "  name: 'relu3' "
+      "  type: RELU "
       "  bottom: 'conv3' "
       "  top: 'conv3' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'pad4' "
-      "    type: 'padding' "
-      "    pad: 1 "
-      "  } "
-      "  bottom: 'conv3' "
-      "  top: 'pad4' "
-      "} "
-      "layers { "
-      "  layer { "
-      "    name: 'conv4' "
-      "    type: 'conv' "
+      "  name: 'conv4' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
       "    num_output: 384 "
       "    group: 2 "
-      "    kernelsize: 3 "
+      "    kernel_size: 3 "
+      "    pad: 1 "
       "    weight_filler { "
       "      type: 'gaussian' "
       "      std: 0.01 "
@@ -455,38 +409,28 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "      type: 'constant' "
       "      value: 1. "
       "    } "
-      "    blobs_lr: 1. "
-      "    blobs_lr: 2. "
-      "    weight_decay: 1. "
-      "    weight_decay: 0. "
       "  } "
-      "  bottom: 'pad4' "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'conv3' "
       "  top: 'conv4' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'relu4' "
-      "    type: 'relu' "
-      "  } "
+      "  name: 'relu4' "
+      "  type: RELU "
       "  bottom: 'conv4' "
       "  top: 'conv4' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'pad5' "
-      "    type: 'padding' "
-      "    pad: 1 "
-      "  } "
-      "  bottom: 'conv4' "
-      "  top: 'pad5' "
-      "} "
-      "layers { "
-      "  layer { "
-      "    name: 'conv5' "
-      "    type: 'conv' "
+      "  name: 'conv5' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
       "    num_output: 256 "
       "    group: 2 "
-      "    kernelsize: 3 "
+      "    kernel_size: 3 "
+      "    pad: 1 "
       "    weight_filler { "
       "      type: 'gaussian' "
       "      std: 0.01 "
@@ -495,27 +439,25 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "      type: 'constant' "
       "      value: 1. "
       "    } "
-      "    blobs_lr: 1. "
-      "    blobs_lr: 2. "
-      "    weight_decay: 1. "
-      "    weight_decay: 0. "
       "  } "
-      "  bottom: 'pad5' "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'conv4' "
       "  top: 'conv5' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'relu5' "
-      "    type: 'relu' "
-      "  } "
+      "  name: 'relu5' "
+      "  type: RELU "
       "  bottom: 'conv5' "
       "  top: 'conv5' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'pool5' "
-      "    type: 'pool' "
-      "    kernelsize: 3 "
+      "  name: 'pool5' "
+      "  type: POOLING "
+      "  pooling_param { "
+      "    kernel_size: 3 "
       "    pool: MAX "
       "    stride: 2 "
       "  } "
@@ -523,9 +465,9 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "  top: 'pool5' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'fc6' "
-      "    type: 'innerproduct' "
+      "  name: 'fc6' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
       "    num_output: 4096 "
       "    weight_filler { "
       "      type: 'gaussian' "
@@ -535,35 +477,33 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "      type: 'constant' "
       "      value: 1. "
       "    } "
-      "    blobs_lr: 1. "
-      "    blobs_lr: 2. "
-      "    weight_decay: 1. "
-      "    weight_decay: 0. "
       "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
       "  bottom: 'pool5' "
       "  top: 'fc6' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'relu6' "
-      "    type: 'relu' "
-      "  } "
+      "  name: 'relu6' "
+      "  type: RELU "
       "  bottom: 'fc6' "
       "  top: 'fc6' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'drop6' "
-      "    type: 'dropout' "
+      "  name: 'drop6' "
+      "  type: DROPOUT "
+      "  dropout_param { "
       "    dropout_ratio: 0.5 "
       "  } "
       "  bottom: 'fc6' "
       "  top: 'fc6' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'fc7' "
-      "    type: 'innerproduct' "
+      "  name: 'fc7' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
       "    num_output: 4096 "
       "    weight_filler { "
       "      type: 'gaussian' "
@@ -573,35 +513,33 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "      type: 'constant' "
       "      value: 1. "
       "    } "
-      "    blobs_lr: 1. "
-      "    blobs_lr: 2. "
-      "    weight_decay: 1. "
-      "    weight_decay: 0. "
       "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
       "  bottom: 'fc6' "
       "  top: 'fc7' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'relu7' "
-      "    type: 'relu' "
-      "  } "
+      "  name: 'relu7' "
+      "  type: RELU "
       "  bottom: 'fc7' "
       "  top: 'fc7' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'drop7' "
-      "    type: 'dropout' "
+      "  name: 'drop7' "
+      "  type: DROPOUT "
+      "  dropout_param { "
       "    dropout_ratio: 0.5 "
       "  } "
       "  bottom: 'fc7' "
       "  top: 'fc7' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'fc8' "
-      "    type: 'innerproduct' "
+      "  name: 'fc8' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
       "    num_output: 1000 "
       "    weight_filler { "
       "      type: 'gaussian' "
@@ -611,57 +549,47 @@ TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionImageNet) {
       "      type: 'constant' "
       "      value: 0 "
       "    } "
-      "    blobs_lr: 1. "
-      "    blobs_lr: 2. "
-      "    weight_decay: 1. "
-      "    weight_decay: 0. "
       "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
       "  bottom: 'fc7' "
       "  top: 'fc8' "
       "} "
       "layers { "
-      "  layer { "
-      "    name: 'loss' "
-      "    type: 'softmax_loss' "
-      "  } "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
       "  bottom: 'fc8' "
       "  bottom: 'label' "
       "} ";
   this->RunInsertionTest(input_proto, input_proto);
 }
 
-TYPED_TEST(SplitLayerInsertionTest, TestInsertionWithInPlace) {
+TYPED_TEST(SplitLayerInsertionTest, TestNoInsertionWithInPlace) {
   const string& input_proto =
       "name: 'TestNetwork' "
       "layers: { "
-      "  layer { "
-      "    name: 'data' "
-      "    type: 'data' "
-      "  } "
+      "  name: 'data' "
+      "  type: DATA "
       "  top: 'data' "
       "  top: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'relu' "
-      "    type: 'relu' "
-      "  } "
+      "  name: 'relu' "
+      "  type: RELU "
       "  bottom: 'innerprod' "
       "  top: 'innerprod' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss' "
-      "    type: 'softmax_with_loss' "
-      "  } "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
       "  bottom: 'innerprod' "
       "  bottom: 'label' "
       "} ";
@@ -672,119 +600,91 @@ TYPED_TEST(SplitLayerInsertionTest, TestInsertion) {
   const string& input_proto =
       "name: 'TestNetwork' "
       "layers: { "
-      "  layer { "
-      "    name: 'data' "
-      "    type: 'data' "
-      "  } "
+      "  name: 'data' "
+      "  type: DATA "
       "  top: 'data' "
       "  top: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod1' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod1' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod2' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod2' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod3' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod3' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod3' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss1' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss1' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod1' "
       "  bottom: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss2' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss2' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod2' "
       "  bottom: 'innerprod3' "
       "} ";
   const string& expected_output_proto =
       "name: 'TestNetwork' "
       "layers: { "
-      "  layer { "
-      "    name: 'data' "
-      "    type: 'data' "
-      "  } "
+      "  name: 'data' "
+      "  type: DATA "
       "  top: 'data' "
       "  top: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'data_data_0_split' "
-      "    type: 'split' "
-      "  } "
+      "  name: 'data_data_0_split' "
+      "  type: SPLIT "
       "  bottom: 'data' "
       "  top: 'data' "
       "  top: 'data_data_0_split_1' "
       "  top: 'data_data_0_split_2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod1' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod1' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod2' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod2' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data_data_0_split_1' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod2_innerprod2_0_split' "
-      "    type: 'split' "
-      "  } "
+      "  name: 'innerprod2_innerprod2_0_split' "
+      "  type: SPLIT "
       "  bottom: 'innerprod2' "
       "  top: 'innerprod2' "
       "  top: 'innerprod2_innerprod2_0_split_1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod3' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod3' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data_data_0_split_2' "
       "  top: 'innerprod3' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss1' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss1' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod1' "
       "  bottom: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss2' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss2' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod2_innerprod2_0_split_1' "
       "  bottom: 'innerprod3' "
       "} ";
@@ -795,134 +695,102 @@ TYPED_TEST(SplitLayerInsertionTest, TestInsertionTwoTop) {
   const string& input_proto =
       "name: 'TestNetwork' "
       "layers: { "
-      "  layer { "
-      "    name: 'data' "
-      "    type: 'data' "
-      "  } "
+      "  name: 'data' "
+      "  type: DATA "
       "  top: 'data' "
       "  top: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod1' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod1' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod2' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod2' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'label' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod3' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod3' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod3' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod4' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod4' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'label' "
       "  top: 'innerprod4' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss1' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss1' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod1' "
       "  bottom: 'innerprod3' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss2' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss2' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod2' "
       "  bottom: 'innerprod4' "
       "} ";
   const string& expected_output_proto =
       "name: 'TestNetwork' "
       "layers: { "
-      "  layer { "
-      "    name: 'data' "
-      "    type: 'data' "
-      "  } "
+      "  name: 'data' "
+      "  type: DATA "
       "  top: 'data' "
       "  top: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'data_data_0_split' "
-      "    type: 'split' "
-      "  } "
+      "  name: 'data_data_0_split' "
+      "  type: SPLIT "
       "  bottom: 'data' "
       "  top: 'data' "
       "  top: 'data_data_0_split_1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'label_data_1_split' "
-      "    type: 'split' "
-      "  } "
+      "  name: 'label_data_1_split' "
+      "  type: SPLIT "
       "  bottom: 'label' "
       "  top: 'label' "
       "  top: 'label_data_1_split_1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod1' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod1' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod2' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod2' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'label' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod3' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod3' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data_data_0_split_1' "
       "  top: 'innerprod3' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod4' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod4' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'label_data_1_split_1' "
       "  top: 'innerprod4' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss1' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss1' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod1' "
       "  bottom: 'innerprod3' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss2' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss2' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod2' "
       "  bottom: 'innerprod4' "
       "} ";
@@ -938,26 +806,20 @@ TYPED_TEST(SplitLayerInsertionTest, TestInputInsertion) {
       "input_dim: 227 "
       "input_dim: 227 "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod1' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod1' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod2' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod2' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod1' "
       "  bottom: 'innerprod2' "
       "} ";
@@ -969,35 +831,27 @@ TYPED_TEST(SplitLayerInsertionTest, TestInputInsertion) {
       "input_dim: 227 "
       "input_dim: 227 "
       "layers: { "
-      "  layer { "
-      "    name: 'data_input_0_split' "
-      "    type: 'split' "
-      "  } "
+      "  name: 'data_input_0_split' "
+      "  type: SPLIT "
       "  bottom: 'data' "
       "  top: 'data' "
       "  top: 'data_input_0_split_1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod1' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod1' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod2' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod2' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data_input_0_split_1' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod1' "
       "  bottom: 'innerprod2' "
       "} ";
@@ -1008,118 +862,90 @@ TYPED_TEST(SplitLayerInsertionTest, TestWithInPlace) {
   const string& input_proto =
       "name: 'TestNetwork' "
       "layers: { "
-      "  layer { "
-      "    name: 'data' "
-      "    type: 'data' "
-      "  } "
+      "  name: 'data' "
+      "  type: DATA "
       "  top: 'data' "
       "  top: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod1' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod1' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'relu1' "
-      "    type: 'relu' "
-      "  } "
+      "  name: 'relu1' "
+      "  type: RELU "
       "  bottom: 'innerprod1' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod2' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod2' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'innerprod1' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss1' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss1' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod1' "
       "  bottom: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss2' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss2' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod2' "
       "  bottom: 'data' "
       "} ";
   const string& expected_output_proto =
       "name: 'TestNetwork' "
       "layers: { "
-      "  layer { "
-      "    name: 'data' "
-      "    type: 'data' "
-      "  } "
+      "  name: 'data' "
+      "  type: DATA "
       "  top: 'data' "
       "  top: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'data_data_0_split' "
-      "    type: 'split' "
-      "  } "
+      "  name: 'data_data_0_split' "
+      "  type: SPLIT "
       "  bottom: 'data' "
       "  top: 'data' "
       "  top: 'data_data_0_split_1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod1' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod1' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'data' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'relu1' "
-      "    type: 'relu' "
-      "  } "
+      "  name: 'relu1' "
+      "  type: RELU "
       "  bottom: 'innerprod1' "
       "  top: 'innerprod1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod1_relu1_0_split' "
-      "    type: 'split' "
-      "  } "
+      "  name: 'innerprod1_relu1_0_split' "
+      "  type: SPLIT "
       "  bottom: 'innerprod1' "
       "  top: 'innerprod1' "
       "  top: 'innerprod1_relu1_0_split_1' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'innerprod2' "
-      "    type: 'inner_product' "
-      "  } "
+      "  name: 'innerprod2' "
+      "  type: INNER_PRODUCT "
       "  bottom: 'innerprod1' "
       "  top: 'innerprod2' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss1' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss1' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod1_relu1_0_split_1' "
       "  bottom: 'label' "
       "} "
       "layers: { "
-      "  layer { "
-      "    name: 'loss2' "
-      "    type: 'euclidean_loss' "
-      "  } "
+      "  name: 'loss2' "
+      "  type: EUCLIDEAN_LOSS "
       "  bottom: 'innerprod2' "
       "  bottom: 'data_data_0_split_1' "
       "} ";
diff --git a/src/caffe/test/test_stochastic_pooling.cpp b/src/caffe/test/test_stochastic_pooling.cpp
index daf2c36d305..0ad8123f881 100644
--- a/src/caffe/test/test_stochastic_pooling.cpp
+++ b/src/caffe/test/test_stochastic_pooling.cpp
@@ -54,8 +54,9 @@ TYPED_TEST_CASE(StochasticPoolingLayerTest, Dtypes);
 
 TYPED_TEST(StochasticPoolingLayerTest, TestSetup) {
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
+  PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
+  pooling_param->set_kernel_size(3);
+  pooling_param->set_stride(2);
   PoolingLayer<TypeParam> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
   EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_->num());
@@ -68,10 +69,10 @@ TYPED_TEST(StochasticPoolingLayerTest, TestStochasticGPU) {
   Caffe::set_mode(Caffe::GPU);
   Caffe::set_phase(Caffe::TRAIN);
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-
-  layer_param.set_pool(LayerParameter_PoolMethod_STOCHASTIC);
+  PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
+  pooling_param->set_kernel_size(3);
+  pooling_param->set_stride(2);
+  pooling_param->set_pool(PoolingParameter_PoolMethod_STOCHASTIC);
   PoolingLayer<TypeParam> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
   layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -112,10 +113,10 @@ TYPED_TEST(StochasticPoolingLayerTest, TestStochasticGPUTestPhase) {
   Caffe::set_mode(Caffe::GPU);
   Caffe::set_phase(Caffe::TEST);
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-
-  layer_param.set_pool(LayerParameter_PoolMethod_STOCHASTIC);
+  PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
+  pooling_param->set_kernel_size(3);
+  pooling_param->set_stride(2);
+  pooling_param->set_pool(PoolingParameter_PoolMethod_STOCHASTIC);
   PoolingLayer<TypeParam> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
   layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
@@ -150,10 +151,10 @@ TYPED_TEST(StochasticPoolingLayerTest, TestGradientGPU) {
   Caffe::set_mode(Caffe::GPU);
   Caffe::set_phase(Caffe::TRAIN);
   LayerParameter layer_param;
-  layer_param.set_kernelsize(3);
-  layer_param.set_stride(2);
-
-  layer_param.set_pool(LayerParameter_PoolMethod_STOCHASTIC);
+  PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
+  pooling_param->set_kernel_size(3);
+  pooling_param->set_stride(2);
+  pooling_param->set_pool(PoolingParameter_PoolMethod_STOCHASTIC);
   PoolingLayer<TypeParam> layer(layer_param);
   GradientChecker<TypeParam> checker(1e-4, 1e-2);
   // it is too expensive to call curand multiple times, so we don't do an
diff --git a/src/caffe/test/test_upgrade_proto.cpp b/src/caffe/test/test_upgrade_proto.cpp
new file mode 100644
index 00000000000..323a8aed61c
--- /dev/null
+++ b/src/caffe/test/test_upgrade_proto.cpp
@@ -0,0 +1,2445 @@
+// Copyright 2014 BVLC and contributors.
+
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "cuda_runtime.h"
+#include "google/protobuf/text_format.h"
+#include "gtest/gtest.h"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/util/upgrade_proto.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+
+using std::string;
+
+namespace caffe {
+
+template <typename Dtype>
+class PaddingLayerUpgradeTest : public ::testing::Test {
+ protected:
+  void RunPaddingUpgradeTest(
+      const string& input_param_string, const string& output_param_string) {
+    // Test that UpgradeV0PaddingLayers called on the proto specified by
+    // input_param_string results in the proto specified by
+    // output_param_string.
+    NetParameter input_param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(
+        input_param_string, &input_param));
+    NetParameter expected_output_param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(
+        output_param_string, &expected_output_param));
+    NetParameter actual_output_param;
+    UpgradeV0PaddingLayers(input_param, &actual_output_param);
+    EXPECT_EQ(expected_output_param.DebugString(),
+        actual_output_param.DebugString());
+    // Also test idempotence.
+    NetParameter double_pad_upgrade_param;
+    UpgradeV0PaddingLayers(actual_output_param, &double_pad_upgrade_param);
+    EXPECT_EQ(actual_output_param.DebugString(),
+       double_pad_upgrade_param.DebugString());
+  }
+};
+
+typedef ::testing::Types<float> PaddingUpgradeDtypes;
+TYPED_TEST_CASE(PaddingLayerUpgradeTest, PaddingUpgradeDtypes);
+
+TYPED_TEST(PaddingLayerUpgradeTest, TestSimple) {
+  const string& input_proto =
+      "name: 'CaffeNet' "
+      "layers { "
+      "  layer { "
+      "    name: 'data' "
+      "    type: 'data' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad1' "
+      "    type: 'padding' "
+      "    pad: 2 "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'pad1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv1' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad1' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc8' "
+      "    type: 'innerproduct' "
+      "    num_output: 1000 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'fc8' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'loss' "
+      "    type: 'softmax_loss' "
+      "  } "
+      "  bottom: 'fc8' "
+      "  bottom: 'label' "
+      "} ";
+  const string& expected_output_proto =
+      "name: 'CaffeNet' "
+      "layers { "
+      "  layer { "
+      "    name: 'data' "
+      "    type: 'data' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv1' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    pad: 2 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc8' "
+      "    type: 'innerproduct' "
+      "    num_output: 1000 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'fc8' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'loss' "
+      "    type: 'softmax_loss' "
+      "  } "
+      "  bottom: 'fc8' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunPaddingUpgradeTest(input_proto, expected_output_proto);
+}
+
+TYPED_TEST(PaddingLayerUpgradeTest, TestTwoTops) {
+  const string& input_proto =
+      "name: 'CaffeNet' "
+      "layers { "
+      "  layer { "
+      "    name: 'data' "
+      "    type: 'data' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad1' "
+      "    type: 'padding' "
+      "    pad: 2 "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'pad1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv1' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad1' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc8' "
+      "    type: 'innerproduct' "
+      "    num_output: 1000 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'fc8' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv2' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad1' "
+      "  top: 'conv2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'loss' "
+      "    type: 'softmax_loss' "
+      "  } "
+      "  bottom: 'fc8' "
+      "  bottom: 'label' "
+      "} ";
+  const string& expected_output_proto =
+      "name: 'CaffeNet' "
+      "layers { "
+      "  layer { "
+      "    name: 'data' "
+      "    type: 'data' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv1' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    pad: 2 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc8' "
+      "    type: 'innerproduct' "
+      "    num_output: 1000 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'fc8' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv2' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    pad: 2 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'conv2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'loss' "
+      "    type: 'softmax_loss' "
+      "  } "
+      "  bottom: 'fc8' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunPaddingUpgradeTest(input_proto, expected_output_proto);
+}
+
+TYPED_TEST(PaddingLayerUpgradeTest, TestImageNet) {
+  const string& input_proto =
+      "name: 'CaffeNet' "
+      "layers { "
+      "  layer { "
+      "    name: 'data' "
+      "    type: 'data' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv1' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu1' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool1' "
+      "    type: 'pool' "
+      "    pool: MAX "
+      "    kernelsize: 3 "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'pool1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'norm1' "
+      "    type: 'lrn' "
+      "    local_size: 5 "
+      "    alpha: 0.0001 "
+      "    beta: 0.75 "
+      "  } "
+      "  bottom: 'pool1' "
+      "  top: 'norm1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad2' "
+      "    type: 'padding' "
+      "    pad: 2 "
+      "  } "
+      "  bottom: 'norm1' "
+      "  top: 'pad2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv2' "
+      "    type: 'conv' "
+      "    num_output: 256 "
+      "    group: 2 "
+      "    kernelsize: 5 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad2' "
+      "  top: 'conv2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu2' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv2' "
+      "  top: 'conv2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool2' "
+      "    type: 'pool' "
+      "    pool: MAX "
+      "    kernelsize: 3 "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv2' "
+      "  top: 'pool2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'norm2' "
+      "    type: 'lrn' "
+      "    local_size: 5 "
+      "    alpha: 0.0001 "
+      "    beta: 0.75 "
+      "  } "
+      "  bottom: 'pool2' "
+      "  top: 'norm2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad3' "
+      "    type: 'padding' "
+      "    pad: 1 "
+      "  } "
+      "  bottom: 'norm2' "
+      "  top: 'pad3' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv3' "
+      "    type: 'conv' "
+      "    num_output: 384 "
+      "    kernelsize: 3 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad3' "
+      "  top: 'conv3' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu3' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv3' "
+      "  top: 'conv3' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad4' "
+      "    type: 'padding' "
+      "    pad: 1 "
+      "  } "
+      "  bottom: 'conv3' "
+      "  top: 'pad4' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv4' "
+      "    type: 'conv' "
+      "    num_output: 384 "
+      "    group: 2 "
+      "    kernelsize: 3 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad4' "
+      "  top: 'conv4' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu4' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv4' "
+      "  top: 'conv4' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad5' "
+      "    type: 'padding' "
+      "    pad: 1 "
+      "  } "
+      "  bottom: 'conv4' "
+      "  top: 'pad5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv5' "
+      "    type: 'conv' "
+      "    num_output: 256 "
+      "    group: 2 "
+      "    kernelsize: 3 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad5' "
+      "  top: 'conv5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu5' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv5' "
+      "  top: 'conv5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool5' "
+      "    type: 'pool' "
+      "    kernelsize: 3 "
+      "    pool: MAX "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv5' "
+      "  top: 'pool5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc6' "
+      "    type: 'innerproduct' "
+      "    num_output: 4096 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.005 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pool5' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu6' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'drop6' "
+      "    type: 'dropout' "
+      "    dropout_ratio: 0.5 "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc7' "
+      "    type: 'innerproduct' "
+      "    num_output: 4096 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.005 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu7' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'fc7' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'drop7' "
+      "    type: 'dropout' "
+      "    dropout_ratio: 0.5 "
+      "  } "
+      "  bottom: 'fc7' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc8' "
+      "    type: 'innerproduct' "
+      "    num_output: 1000 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'fc7' "
+      "  top: 'fc8' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'loss' "
+      "    type: 'softmax_loss' "
+      "  } "
+      "  bottom: 'fc8' "
+      "  bottom: 'label' "
+      "} ";
+  const string& expected_output_proto =
+      "name: 'CaffeNet' "
+      "layers { "
+      "  layer { "
+      "    name: 'data' "
+      "    type: 'data' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv1' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu1' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool1' "
+      "    type: 'pool' "
+      "    pool: MAX "
+      "    kernelsize: 3 "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'pool1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'norm1' "
+      "    type: 'lrn' "
+      "    local_size: 5 "
+      "    alpha: 0.0001 "
+      "    beta: 0.75 "
+      "  } "
+      "  bottom: 'pool1' "
+      "  top: 'norm1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv2' "
+      "    type: 'conv' "
+      "    num_output: 256 "
+      "    group: 2 "
+      "    kernelsize: 5 "
+      "    pad: 2 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'norm1' "
+      "  top: 'conv2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu2' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv2' "
+      "  top: 'conv2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool2' "
+      "    type: 'pool' "
+      "    pool: MAX "
+      "    kernelsize: 3 "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv2' "
+      "  top: 'pool2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'norm2' "
+      "    type: 'lrn' "
+      "    local_size: 5 "
+      "    alpha: 0.0001 "
+      "    beta: 0.75 "
+      "  } "
+      "  bottom: 'pool2' "
+      "  top: 'norm2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv3' "
+      "    type: 'conv' "
+      "    num_output: 384 "
+      "    kernelsize: 3 "
+      "    pad: 1 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'norm2' "
+      "  top: 'conv3' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu3' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv3' "
+      "  top: 'conv3' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv4' "
+      "    type: 'conv' "
+      "    num_output: 384 "
+      "    group: 2 "
+      "    kernelsize: 3 "
+      "    pad: 1 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'conv3' "
+      "  top: 'conv4' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu4' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv4' "
+      "  top: 'conv4' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv5' "
+      "    type: 'conv' "
+      "    num_output: 256 "
+      "    group: 2 "
+      "    kernelsize: 3 "
+      "    pad: 1 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'conv4' "
+      "  top: 'conv5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu5' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv5' "
+      "  top: 'conv5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool5' "
+      "    type: 'pool' "
+      "    kernelsize: 3 "
+      "    pool: MAX "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv5' "
+      "  top: 'pool5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc6' "
+      "    type: 'innerproduct' "
+      "    num_output: 4096 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.005 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pool5' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu6' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'drop6' "
+      "    type: 'dropout' "
+      "    dropout_ratio: 0.5 "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc7' "
+      "    type: 'innerproduct' "
+      "    num_output: 4096 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.005 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu7' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'fc7' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'drop7' "
+      "    type: 'dropout' "
+      "    dropout_ratio: 0.5 "
+      "  } "
+      "  bottom: 'fc7' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc8' "
+      "    type: 'innerproduct' "
+      "    num_output: 1000 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'fc7' "
+      "  top: 'fc8' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'loss' "
+      "    type: 'softmax_loss' "
+      "  } "
+      "  bottom: 'fc8' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunPaddingUpgradeTest(input_proto, expected_output_proto);
+}
+
+template <typename Dtype>
+class V0UpgradeTest : public ::testing::Test {
+ protected:
+  void RunV0UpgradeTest(
+      const string& input_param_string, const string& output_param_string) {
+    // Test that UpgradeV0Net called on the NetParameter proto specified by
+    // input_param_string results in the NetParameter proto specified by
+    // output_param_string.
+    NetParameter input_param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(
+        input_param_string, &input_param));
+    NetParameter expected_output_param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(
+        output_param_string, &expected_output_param));
+    NetParameter actual_output_param;
+    UpgradeV0Net(input_param, &actual_output_param);
+    EXPECT_EQ(expected_output_param.DebugString(),
+        actual_output_param.DebugString());
+  }
+};
+
+typedef ::testing::Types<float> V0UpgradeDtypes;
+TYPED_TEST_CASE(V0UpgradeTest, V0UpgradeDtypes);
+
+TYPED_TEST(V0UpgradeTest, TestSimple) {
+  const string& input_proto =
+      "name: 'CaffeNet' "
+      "layers { "
+      "  layer { "
+      "    name: 'data' "
+      "    type: 'data' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad1' "
+      "    type: 'padding' "
+      "    pad: 2 "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'pad1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv1' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad1' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc8' "
+      "    type: 'innerproduct' "
+      "    num_output: 1000 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'fc8' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'loss' "
+      "    type: 'softmax_loss' "
+      "  } "
+      "  bottom: 'fc8' "
+      "  bottom: 'label' "
+      "} ";
+  const string& expected_output_proto =
+      "name: 'CaffeNet' "
+      "layers { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  data_param { "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batch_size: 256 "
+      "    crop_size: 227 "
+      "    mirror: true "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  name: 'conv1' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
+      "    num_output: 96 "
+      "    kernel_size: 11 "
+      "    stride: 4 "
+      "    pad: 2 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  name: 'fc8' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 1000 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'conv1' "
+      "  top: 'fc8' "
+      "} "
+      "layers { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'fc8' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunV0UpgradeTest(input_proto, expected_output_proto);
+}
+
+// Test any layer or parameter upgrades not covered by other tests.
+TYPED_TEST(V0UpgradeTest, TestAllParams) {
+  const string& input_proto =
+      "name: 'CaffeNet' "
+      "input: 'input_data' "
+      "input_dim: 64 "
+      "input_dim: 3 "
+      "input_dim: 32 "
+      "input_dim: 32 "
+      "layers { "
+      "  layer { "
+      "    name: 'data' "
+      "    type: 'data' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "    scale: 0.25 "
+      "    rand_skip: 73 "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'images' "
+      "    type: 'images' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-images' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "    scale: 0.25 "
+      "    rand_skip: 73 "
+      "    shuffle_images: true "
+      "    new_height: 40 "
+      "    new_width: 30 "
+      "  } "
+      "  top: 'images_data' "
+      "  top: 'images_label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'window_data' "
+      "    type: 'window_data' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "    det_fg_threshold: 0.25 "
+      "    det_bg_threshold: 0.75 "
+      "    det_fg_fraction: 0.5 "
+      "    det_context_pad: 16 "
+      "    det_crop_mode: 'square' "
+      "  } "
+      "  top: 'window_data' "
+      "  top: 'window_label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'hdf5data' "
+      "    type: 'hdf5_data' "
+      "    source: '/my/hdf5/data' "
+      "    batchsize: 256 "
+      "  } "
+      "  top: 'hdf5data' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv1' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    biasterm: false "
+      "    pad: 4 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 3. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool1ave' "
+      "    type: 'pool' "
+      "    pool: AVE "
+      "    kernelsize: 3 "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'pool1ave' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool1stoch' "
+      "    type: 'pool' "
+      "    pool: STOCHASTIC "
+      "    kernelsize: 4 "
+      "    stride: 5 "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'pool1stoch' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'concat' "
+      "    type: 'concat' "
+      "    concat_dim: 2 "
+      "  } "
+      "  bottom: 'pool1ave' "
+      "  bottom: 'pool1stoch' "
+      "  top: 'pool1concat' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'norm1' "
+      "    type: 'lrn' "
+      "    local_size: 5 "
+      "    alpha: 0.0001 "
+      "    beta: 0.75 "
+      "  } "
+      "  bottom: 'pool1concat' "
+      "  top: 'norm1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc6' "
+      "    type: 'innerproduct' "
+      "    num_output: 4096 "
+      "    biasterm: false "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.005 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'norm1' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu6' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'drop6' "
+      "    type: 'dropout' "
+      "    dropout_ratio: 0.2 "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'loss' "
+      "    type: 'infogain_loss' "
+      "    source: '/my/infogain/matrix' "
+      "  } "
+      "  bottom: 'fc6' "
+      "  bottom: 'label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'accuracy' "
+      "    type: 'accuracy' "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'bnll' "
+      "    type: 'bnll' "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'euclidean_loss' "
+      "    type: 'euclidean_loss' "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'flatten' "
+      "    type: 'flatten' "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'hdf5_output' "
+      "    type: 'hdf5_output' "
+      "    hdf5_output_param { "
+      "      file_name: '/my/hdf5/output/file' "
+      "    } "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'im2col' "
+      "    type: 'im2col' "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'images' "
+      "    type: 'images' "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'multinomial_logistic_loss' "
+      "    type: 'multinomial_logistic_loss' "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'sigmoid' "
+      "    type: 'sigmoid' "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'softmax' "
+      "    type: 'softmax' "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'split' "
+      "    type: 'split' "
+      "  } "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'tanh' "
+      "    type: 'tanh' "
+      "  } "
+      "} ";
+  const string& expected_output_proto =
+      "name: 'CaffeNet' "
+      "input: 'input_data' "
+      "input_dim: 64 "
+      "input_dim: 3 "
+      "input_dim: 32 "
+      "input_dim: 32 "
+      "layers { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  data_param { "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batch_size: 256 "
+      "    crop_size: 227 "
+      "    mirror: true "
+      "    scale: 0.25 "
+      "    rand_skip: 73 "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  name: 'images' "
+      "  type: IMAGE_DATA "
+      "  image_data_param { "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-images' "
+      "    mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batch_size: 256 "
+      "    crop_size: 227 "
+      "    mirror: true "
+      "    scale: 0.25 "
+      "    rand_skip: 73 "
+      "    shuffle: true "
+      "    new_height: 40 "
+      "    new_width: 30 "
+      "  } "
+      "  top: 'images_data' "
+      "  top: 'images_label' "
+      "} "
+      "layers { "
+      "  name: 'window_data' "
+      "  type: WINDOW_DATA "
+      "  window_data_param { "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batch_size: 256 "
+      "    crop_size: 227 "
+      "    mirror: true "
+      "    fg_threshold: 0.25 "
+      "    bg_threshold: 0.75 "
+      "    fg_fraction: 0.5 "
+      "    context_pad: 16 "
+      "    crop_mode: 'square' "
+      "  } "
+      "  top: 'window_data' "
+      "  top: 'window_label' "
+      "} "
+      "layers { "
+      "  name: 'hdf5data' "
+      "  type: HDF5_DATA "
+      "  hdf5_data_param { "
+      "    source: '/my/hdf5/data' "
+      "    batch_size: 256 "
+      "  } "
+      "  top: 'hdf5data' "
+      "} "
+      "layers { "
+      "  name: 'conv1' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
+      "    num_output: 96 "
+      "    bias_term: false "
+      "    pad: 4 "
+      "    kernel_size: 11 "
+      "    stride: 4 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 3. "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  name: 'pool1ave' "
+      "  type: POOLING "
+      "  pooling_param { "
+      "    pool: AVE "
+      "    kernel_size: 3 "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'pool1ave' "
+      "} "
+      "layers { "
+      "  name: 'pool1stoch' "
+      "  type: POOLING "
+      "  pooling_param { "
+      "    pool: STOCHASTIC "
+      "    kernel_size: 4 "
+      "    stride: 5 "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'pool1stoch' "
+      "} "
+      "layers { "
+      "  name: 'concat' "
+      "  type: CONCAT "
+      "  concat_param { "
+      "    concat_dim: 2 "
+      "  } "
+      "  bottom: 'pool1ave' "
+      "  bottom: 'pool1stoch' "
+      "  top: 'pool1concat' "
+      "} "
+      "layers { "
+      "  name: 'norm1' "
+      "  type: LRN "
+      "  lrn_param { "
+      "    local_size: 5 "
+      "    alpha: 0.0001 "
+      "    beta: 0.75 "
+      "  } "
+      "  bottom: 'pool1concat' "
+      "  top: 'norm1' "
+      "} "
+      "layers { "
+      "  name: 'fc6' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 4096 "
+      "    bias_term: false "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.005 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'norm1' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  name: 'relu6' "
+      "  type: RELU "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  name: 'drop6' "
+      "  type: DROPOUT "
+      "  dropout_param { "
+      "    dropout_ratio: 0.2 "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  name: 'loss' "
+      "  type: INFOGAIN_LOSS "
+      "  infogain_loss_param { "
+      "    source: '/my/infogain/matrix' "
+      "  } "
+      "  bottom: 'fc6' "
+      "  bottom: 'label' "
+      "} "
+      "layers { "
+      "  name: 'accuracy' "
+      "  type: ACCURACY "
+      "} "
+      "layers { "
+      "  name: 'bnll' "
+      "  type: BNLL "
+      "} "
+      "layers { "
+      "  name: 'euclidean_loss' "
+      "  type: EUCLIDEAN_LOSS "
+      "} "
+      "layers { "
+      "  name: 'flatten' "
+      "  type: FLATTEN "
+      "} "
+      "layers { "
+      "  name: 'hdf5_output' "
+      "  type: HDF5_OUTPUT "
+      "  hdf5_output_param { "
+      "    file_name: '/my/hdf5/output/file' "
+      "  } "
+      "} "
+      "layers { "
+      "  name: 'im2col' "
+      "  type: IM2COL "
+      "} "
+      "layers { "
+      "  name: 'images' "
+      "  type: IMAGE_DATA "
+      "} "
+      "layers { "
+      "  name: 'multinomial_logistic_loss' "
+      "  type: MULTINOMIAL_LOGISTIC_LOSS "
+      "} "
+      "layers { "
+      "  name: 'sigmoid' "
+      "  type: SIGMOID "
+      "} "
+      "layers { "
+      "  name: 'softmax' "
+      "  type: SOFTMAX "
+      "} "
+      "layers { "
+      "  name: 'split' "
+      "  type: SPLIT "
+      "} "
+      "layers { "
+      "  name: 'tanh' "
+      "  type: TANH "
+      "} ";
+  this->RunV0UpgradeTest(input_proto, expected_output_proto);
+}
+
+TYPED_TEST(V0UpgradeTest, TestImageNet) {
+  const string& input_proto =
+      "name: 'CaffeNet' "
+      "layers { "
+      "  layer { "
+      "    name: 'data' "
+      "    type: 'data' "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    meanfile: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batchsize: 256 "
+      "    cropsize: 227 "
+      "    mirror: true "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv1' "
+      "    type: 'conv' "
+      "    num_output: 96 "
+      "    kernelsize: 11 "
+      "    stride: 4 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu1' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool1' "
+      "    type: 'pool' "
+      "    pool: MAX "
+      "    kernelsize: 3 "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'pool1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'norm1' "
+      "    type: 'lrn' "
+      "    local_size: 5 "
+      "    alpha: 0.0001 "
+      "    beta: 0.75 "
+      "  } "
+      "  bottom: 'pool1' "
+      "  top: 'norm1' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad2' "
+      "    type: 'padding' "
+      "    pad: 2 "
+      "  } "
+      "  bottom: 'norm1' "
+      "  top: 'pad2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv2' "
+      "    type: 'conv' "
+      "    num_output: 256 "
+      "    group: 2 "
+      "    kernelsize: 5 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad2' "
+      "  top: 'conv2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu2' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv2' "
+      "  top: 'conv2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool2' "
+      "    type: 'pool' "
+      "    pool: MAX "
+      "    kernelsize: 3 "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv2' "
+      "  top: 'pool2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'norm2' "
+      "    type: 'lrn' "
+      "    local_size: 5 "
+      "    alpha: 0.0001 "
+      "    beta: 0.75 "
+      "  } "
+      "  bottom: 'pool2' "
+      "  top: 'norm2' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad3' "
+      "    type: 'padding' "
+      "    pad: 1 "
+      "  } "
+      "  bottom: 'norm2' "
+      "  top: 'pad3' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv3' "
+      "    type: 'conv' "
+      "    num_output: 384 "
+      "    kernelsize: 3 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad3' "
+      "  top: 'conv3' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu3' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv3' "
+      "  top: 'conv3' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad4' "
+      "    type: 'padding' "
+      "    pad: 1 "
+      "  } "
+      "  bottom: 'conv3' "
+      "  top: 'pad4' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv4' "
+      "    type: 'conv' "
+      "    num_output: 384 "
+      "    group: 2 "
+      "    kernelsize: 3 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad4' "
+      "  top: 'conv4' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu4' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv4' "
+      "  top: 'conv4' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pad5' "
+      "    type: 'padding' "
+      "    pad: 1 "
+      "  } "
+      "  bottom: 'conv4' "
+      "  top: 'pad5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'conv5' "
+      "    type: 'conv' "
+      "    num_output: 256 "
+      "    group: 2 "
+      "    kernelsize: 3 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pad5' "
+      "  top: 'conv5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu5' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'conv5' "
+      "  top: 'conv5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'pool5' "
+      "    type: 'pool' "
+      "    kernelsize: 3 "
+      "    pool: MAX "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv5' "
+      "  top: 'pool5' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc6' "
+      "    type: 'innerproduct' "
+      "    num_output: 4096 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.005 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'pool5' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu6' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'drop6' "
+      "    type: 'dropout' "
+      "    dropout_ratio: 0.5 "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc7' "
+      "    type: 'innerproduct' "
+      "    num_output: 4096 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.005 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'relu7' "
+      "    type: 'relu' "
+      "  } "
+      "  bottom: 'fc7' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'drop7' "
+      "    type: 'dropout' "
+      "    dropout_ratio: 0.5 "
+      "  } "
+      "  bottom: 'fc7' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'fc8' "
+      "    type: 'innerproduct' "
+      "    num_output: 1000 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "    blobs_lr: 1. "
+      "    blobs_lr: 2. "
+      "    weight_decay: 1. "
+      "    weight_decay: 0. "
+      "  } "
+      "  bottom: 'fc7' "
+      "  top: 'fc8' "
+      "} "
+      "layers { "
+      "  layer { "
+      "    name: 'loss' "
+      "    type: 'softmax_loss' "
+      "  } "
+      "  bottom: 'fc8' "
+      "  bottom: 'label' "
+      "} ";
+  const string& expected_output_proto =
+      "name: 'CaffeNet' "
+      "layers { "
+      "  name: 'data' "
+      "  type: DATA "
+      "  data_param { "
+      "    source: '/home/jiayq/Data/ILSVRC12/train-leveldb' "
+      "    mean_file: '/home/jiayq/Data/ILSVRC12/image_mean.binaryproto' "
+      "    batch_size: 256 "
+      "    crop_size: 227 "
+      "    mirror: true "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layers { "
+      "  name: 'conv1' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
+      "    num_output: 96 "
+      "    kernel_size: 11 "
+      "    stride: 4 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'data' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  name: 'relu1' "
+      "  type: RELU "
+      "  bottom: 'conv1' "
+      "  top: 'conv1' "
+      "} "
+      "layers { "
+      "  name: 'pool1' "
+      "  type: POOLING "
+      "  pooling_param { "
+      "    pool: MAX "
+      "    kernel_size: 3 "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv1' "
+      "  top: 'pool1' "
+      "} "
+      "layers { "
+      "  name: 'norm1' "
+      "  type: LRN "
+      "  lrn_param { "
+      "    local_size: 5 "
+      "    alpha: 0.0001 "
+      "    beta: 0.75 "
+      "  } "
+      "  bottom: 'pool1' "
+      "  top: 'norm1' "
+      "} "
+      "layers { "
+      "  name: 'conv2' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
+      "    num_output: 256 "
+      "    group: 2 "
+      "    kernel_size: 5 "
+      "    pad: 2 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'norm1' "
+      "  top: 'conv2' "
+      "} "
+      "layers { "
+      "  name: 'relu2' "
+      "  type: RELU "
+      "  bottom: 'conv2' "
+      "  top: 'conv2' "
+      "} "
+      "layers { "
+      "  name: 'pool2' "
+      "  type: POOLING "
+      "  pooling_param { "
+      "    pool: MAX "
+      "    kernel_size: 3 "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv2' "
+      "  top: 'pool2' "
+      "} "
+      "layers { "
+      "  name: 'norm2' "
+      "  type: LRN "
+      "  lrn_param { "
+      "    local_size: 5 "
+      "    alpha: 0.0001 "
+      "    beta: 0.75 "
+      "  } "
+      "  bottom: 'pool2' "
+      "  top: 'norm2' "
+      "} "
+      "layers { "
+      "  name: 'conv3' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
+      "    num_output: 384 "
+      "    kernel_size: 3 "
+      "    pad: 1 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0. "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'norm2' "
+      "  top: 'conv3' "
+      "} "
+      "layers { "
+      "  name: 'relu3' "
+      "  type: RELU "
+      "  bottom: 'conv3' "
+      "  top: 'conv3' "
+      "} "
+      "layers { "
+      "  name: 'conv4' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
+      "    num_output: 384 "
+      "    group: 2 "
+      "    kernel_size: 3 "
+      "    pad: 1 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'conv3' "
+      "  top: 'conv4' "
+      "} "
+      "layers { "
+      "  name: 'relu4' "
+      "  type: RELU "
+      "  bottom: 'conv4' "
+      "  top: 'conv4' "
+      "} "
+      "layers { "
+      "  name: 'conv5' "
+      "  type: CONVOLUTION "
+      "  convolution_param { "
+      "    num_output: 256 "
+      "    group: 2 "
+      "    kernel_size: 3 "
+      "    pad: 1 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'conv4' "
+      "  top: 'conv5' "
+      "} "
+      "layers { "
+      "  name: 'relu5' "
+      "  type: RELU "
+      "  bottom: 'conv5' "
+      "  top: 'conv5' "
+      "} "
+      "layers { "
+      "  name: 'pool5' "
+      "  type: POOLING "
+      "  pooling_param { "
+      "    kernel_size: 3 "
+      "    pool: MAX "
+      "    stride: 2 "
+      "  } "
+      "  bottom: 'conv5' "
+      "  top: 'pool5' "
+      "} "
+      "layers { "
+      "  name: 'fc6' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 4096 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.005 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'pool5' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  name: 'relu6' "
+      "  type: RELU "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  name: 'drop6' "
+      "  type: DROPOUT "
+      "  dropout_param { "
+      "    dropout_ratio: 0.5 "
+      "  } "
+      "  bottom: 'fc6' "
+      "  top: 'fc6' "
+      "} "
+      "layers { "
+      "  name: 'fc7' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 4096 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.005 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 1. "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'fc6' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  name: 'relu7' "
+      "  type: RELU "
+      "  bottom: 'fc7' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  name: 'drop7' "
+      "  type: DROPOUT "
+      "  dropout_param { "
+      "    dropout_ratio: 0.5 "
+      "  } "
+      "  bottom: 'fc7' "
+      "  top: 'fc7' "
+      "} "
+      "layers { "
+      "  name: 'fc8' "
+      "  type: INNER_PRODUCT "
+      "  inner_product_param { "
+      "    num_output: 1000 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    bias_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "  } "
+      "  blobs_lr: 1. "
+      "  blobs_lr: 2. "
+      "  weight_decay: 1. "
+      "  weight_decay: 0. "
+      "  bottom: 'fc7' "
+      "  top: 'fc8' "
+      "} "
+      "layers { "
+      "  name: 'loss' "
+      "  type: SOFTMAX_LOSS "
+      "  bottom: 'fc8' "
+      "  bottom: 'label' "
+      "} ";
+  this->RunV0UpgradeTest(input_proto, expected_output_proto);
+}
+
+}  // namespace caffe
diff --git a/src/caffe/util/insert_splits.cpp b/src/caffe/util/insert_splits.cpp
index f72f20c0e25..b9aeb37c71b 100644
--- a/src/caffe/util/insert_splits.cpp
+++ b/src/caffe/util/insert_splits.cpp
@@ -15,7 +15,7 @@ using std::make_pair;
 
 namespace caffe {
 
-void insert_splits(const NetParameter& param, NetParameter* param_split) {
+void InsertSplits(const NetParameter& param, NetParameter* param_split) {
   // Initialize by copying from the input NetParameter.
   param_split->CopyFrom(param);
   param_split->clear_layers();
@@ -31,10 +31,10 @@ void insert_splits(const NetParameter& param, NetParameter* param_split) {
     blob_name_to_last_top_idx[blob_name] = make_pair(-1, i);
   }
   for (int i = 0; i < param.layers_size(); ++i) {
-    const LayerConnection& layer_connection = param.layers(i);
-    layer_idx_to_layer_name[i] = layer_connection.layer().name();
-    for (int j = 0; j < layer_connection.bottom_size(); ++j) {
-      const string& blob_name = layer_connection.bottom(j);
+    const LayerParameter& layer_param = param.layers(i);
+    layer_idx_to_layer_name[i] = layer_param.name();
+    for (int j = 0; j < layer_param.bottom_size(); ++j) {
+      const string& blob_name = layer_param.bottom(j);
       if (blob_name_to_last_top_idx.find(blob_name) ==
           blob_name_to_last_top_idx.end()) {
         LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j;
@@ -44,8 +44,8 @@ void insert_splits(const NetParameter& param, NetParameter* param_split) {
       bottom_idx_to_source_top_idx[bottom_idx] = top_idx;
       ++top_idx_to_bottom_count[top_idx];
     }
-    for (int j = 0; j < layer_connection.top_size(); ++j) {
-      const string& blob_name = layer_connection.top(j);
+    for (int j = 0; j < layer_param.top_size(); ++j) {
+      const string& blob_name = layer_param.top(j);
       blob_name_to_last_top_idx[blob_name] = make_pair(i, j);
     }
   }
@@ -56,57 +56,55 @@ void insert_splits(const NetParameter& param, NetParameter* param_split) {
     if (split_count > 1) {
       const string& layer_name = layer_idx_to_layer_name[-1];
       const string& blob_name = param.input(i);
-      LayerConnection* split_layer_connection = param_split->add_layers();
-      configure_split_layer(layer_name, blob_name, i, split_count,
-          split_layer_connection);
+      LayerParameter* split_layer_param = param_split->add_layers();
+      ConfigureSplitLayer(layer_name, blob_name, i, split_count,
+          split_layer_param);
     }
   }
   for (int i = 0; i < param.layers_size(); ++i) {
-    LayerConnection* layer_connection = param_split->add_layers();
-    layer_connection->CopyFrom(param.layers(i));
+    LayerParameter* layer_param = param_split->add_layers();
+    layer_param->CopyFrom(param.layers(i));
     // Replace any shared bottom blobs with split layer outputs.
-    for (int j = 0; j < layer_connection->bottom_size(); ++j) {
+    for (int j = 0; j < layer_param->bottom_size(); ++j) {
       const pair<int, int>& top_idx =
           bottom_idx_to_source_top_idx[make_pair(i, j)];
       const int split_count = top_idx_to_bottom_count[top_idx];
       if (split_count > 1) {
         const string& layer_name = layer_idx_to_layer_name[top_idx.first];
-        const string& blob_name = layer_connection->bottom(j);
-        layer_connection->set_bottom(j, get_split_blob_name(layer_name,
+        const string& blob_name = layer_param->bottom(j);
+        layer_param->set_bottom(j, SplitBlobName(layer_name,
             blob_name, top_idx.second, top_idx_to_bottom_split_idx[top_idx]++));
       }
     }
     // Create split layer for any top blobs used by other layers as bottom
     // blobs more than once.
-    for (int j = 0; j < layer_connection->top_size(); ++j) {
+    for (int j = 0; j < layer_param->top_size(); ++j) {
       const int split_count = top_idx_to_bottom_count[make_pair(i, j)];
       if (split_count > 1) {
         const string& layer_name = layer_idx_to_layer_name[i];
-        const string& blob_name = layer_connection->top(j);
-        LayerConnection* split_layer_connection = param_split->add_layers();
-        configure_split_layer(layer_name, blob_name, j, split_count,
-            split_layer_connection);
+        const string& blob_name = layer_param->top(j);
+        LayerParameter* split_layer_param = param_split->add_layers();
+        ConfigureSplitLayer(layer_name, blob_name, j, split_count,
+            split_layer_param);
       }
     }
   }
 }
 
-void configure_split_layer(const string& layer_name, const string& blob_name,
+void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
     const int blob_idx, const int split_count,
-    LayerConnection* split_layer_connection) {
-  split_layer_connection->Clear();
-  split_layer_connection->add_bottom(blob_name);
-  LayerParameter* split_layer_param = split_layer_connection->mutable_layer();
-  split_layer_param->set_name(
-      get_split_layer_name(layer_name, blob_name, blob_idx));
-  split_layer_param->set_type("split");
+    LayerParameter* split_layer_param) {
+  split_layer_param->Clear();
+  split_layer_param->add_bottom(blob_name);
+  split_layer_param->set_name(SplitLayerName(layer_name, blob_name, blob_idx));
+  split_layer_param->set_type(LayerParameter_LayerType_SPLIT);
   for (int k = 0; k < split_count; ++k) {
-    split_layer_connection->add_top(
-        get_split_blob_name(layer_name, blob_name, blob_idx, k));
+    split_layer_param->add_top(
+        SplitBlobName(layer_name, blob_name, blob_idx, k));
   }
 }
 
-string get_split_layer_name(const string& layer_name, const string& blob_name,
+string SplitLayerName(const string& layer_name, const string& blob_name,
     const int blob_idx) {
   ostringstream split_layer_name;
   split_layer_name << blob_name << "_" << layer_name << "_" << blob_idx
@@ -114,7 +112,7 @@ string get_split_layer_name(const string& layer_name, const string& blob_name,
   return split_layer_name.str();
 }
 
-string get_split_blob_name(const string& layer_name, const string& blob_name,
+string SplitBlobName(const string& layer_name, const string& blob_name,
     const int blob_idx, const int split_idx) {
   // 0th split top blob is given the same name as the bottom blob so that
   // computation is done 'in-place', saving a bit of time and memory.
diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp
index fdad21d6cac..e1e3c3a3fe5 100644
--- a/src/caffe/util/io.cpp
+++ b/src/caffe/util/io.cpp
@@ -29,17 +29,18 @@ using google::protobuf::io::ZeroCopyInputStream;
 using google::protobuf::io::CodedInputStream;
 using google::protobuf::io::ZeroCopyOutputStream;
 using google::protobuf::io::CodedOutputStream;
+using google::protobuf::Message;
 
 namespace caffe {
 
-void ReadProtoFromTextFile(const char* filename,
-    ::google::protobuf::Message* proto) {
+bool ReadProtoFromTextFile(const char* filename, Message* proto) {
   int fd = open(filename, O_RDONLY);
   CHECK_NE(fd, -1) << "File not found: " << filename;
   FileInputStream* input = new FileInputStream(fd);
-  CHECK(google::protobuf::TextFormat::Parse(input, proto));
+  bool success = google::protobuf::TextFormat::Parse(input, proto);
   delete input;
   close(fd);
+  return success;
 }
 
 void WriteProtoToTextFile(const Message& proto, const char* filename) {
@@ -50,18 +51,19 @@ void WriteProtoToTextFile(const Message& proto, const char* filename) {
   close(fd);
 }
 
-void ReadProtoFromBinaryFile(const char* filename, Message* proto) {
+bool ReadProtoFromBinaryFile(const char* filename, Message* proto) {
   int fd = open(filename, O_RDONLY);
   CHECK_NE(fd, -1) << "File not found: " << filename;
   ZeroCopyInputStream* raw_input = new FileInputStream(fd);
   CodedInputStream* coded_input = new CodedInputStream(raw_input);
   coded_input->SetTotalBytesLimit(536870912, 268435456);
 
-  CHECK(proto->ParseFromCodedStream(coded_input));
+  bool success = proto->ParseFromCodedStream(coded_input);
 
   delete coded_input;
   delete raw_input;
   close(fd);
+  return success;
 }
 
 void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
diff --git a/src/caffe/util/upgrade_proto.cpp b/src/caffe/util/upgrade_proto.cpp
new file mode 100644
index 00000000000..e079b422dfb
--- /dev/null
+++ b/src/caffe/util/upgrade_proto.cpp
@@ -0,0 +1,615 @@
+// Copyright 2014 BVLC and contributors.
+
+#include <google/protobuf/text_format.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/io/coded_stream.h>
+
+#include <map>
+#include <string>
+
+#include "caffe/common.hpp"
+#include "caffe/util/io.hpp"
+#include "caffe/util/upgrade_proto.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+using std::map;
+using std::string;
+
+namespace caffe {
+
+bool NetNeedsUpgrade(const NetParameter& net_param) {
+  for (int i = 0; i < net_param.layers_size(); ++i) {
+    if (net_param.layers(i).has_layer()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool UpgradeV0Net(const NetParameter& v0_net_param_padding_layers,
+                  NetParameter* net_param) {
+  // First upgrade padding layers to padded conv layers.
+  NetParameter v0_net_param;
+  UpgradeV0PaddingLayers(v0_net_param_padding_layers, &v0_net_param);
+  // Now upgrade layer parameters.
+  bool is_fully_compatible = true;
+  net_param->Clear();
+  if (v0_net_param.has_name()) {
+    net_param->set_name(v0_net_param.name());
+  }
+  for (int i = 0; i < v0_net_param.layers_size(); ++i) {
+    is_fully_compatible &= UpgradeLayerParameter(v0_net_param.layers(i),
+                                                 net_param->add_layers());
+  }
+  for (int i = 0; i < v0_net_param.input_size(); ++i) {
+    net_param->add_input(v0_net_param.input(i));
+  }
+  for (int i = 0; i < v0_net_param.input_dim_size(); ++i) {
+    net_param->add_input_dim(v0_net_param.input_dim(i));
+  }
+  if (v0_net_param.has_force_backward()) {
+    net_param->set_force_backward(v0_net_param.force_backward());
+  }
+  return is_fully_compatible;
+}
+
+void UpgradeV0PaddingLayers(const NetParameter& param,
+                            NetParameter* param_upgraded_pad) {
+  // Copy everything other than the layers from the original param.
+  param_upgraded_pad->Clear();
+  param_upgraded_pad->CopyFrom(param);
+  param_upgraded_pad->clear_layers();
+  // Figure out which layer each bottom blob comes from.
+  map<string, int> blob_name_to_last_top_idx;
+  for (int i = 0; i < param.input_size(); ++i) {
+    const string& blob_name = param.input(i);
+    blob_name_to_last_top_idx[blob_name] = -1;
+  }
+  for (int i = 0; i < param.layers_size(); ++i) {
+    const LayerParameter& layer_connection = param.layers(i);
+    const V0LayerParameter& layer_param = layer_connection.layer();
+    // Add the layer to the new net, unless it's a padding layer.
+    if (layer_param.type() != "padding") {
+      param_upgraded_pad->add_layers()->CopyFrom(layer_connection);
+    }
+    for (int j = 0; j < layer_connection.bottom_size(); ++j) {
+      const string& blob_name = layer_connection.bottom(j);
+      if (blob_name_to_last_top_idx.find(blob_name) ==
+          blob_name_to_last_top_idx.end()) {
+        LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j;
+      }
+      const int top_idx = blob_name_to_last_top_idx[blob_name];
+      if (top_idx == -1) {
+        continue;
+      }
+      LayerParameter source_layer = param.layers(top_idx);
+      if (source_layer.layer().type() == "padding") {
+        // This layer has a padding layer as input -- check that it is a conv
+        // layer and takes only one input.  Also check that the padding layer
+        // input has only one input and one output.  Other cases have undefined
+        // behavior in Caffe.
+        CHECK_EQ(layer_param.type(), "conv") << "Padding layer input to "
+            "non-convolutional layer type " << layer_param.type();
+        CHECK_EQ(layer_connection.bottom_size(), 1)
+            << "Conv Layer takes a single blob as input.";
+        CHECK_EQ(source_layer.bottom_size(), 1)
+            << "Padding Layer takes a single blob as input.";
+        CHECK_EQ(source_layer.top_size(), 1)
+            << "Padding Layer produces a single blob as output.";
+        int layer_index = param_upgraded_pad->layers_size() - 1;
+        param_upgraded_pad->mutable_layers(layer_index)->mutable_layer()
+            ->set_pad(source_layer.layer().pad());
+        param_upgraded_pad->mutable_layers(layer_index)
+            ->set_bottom(j, source_layer.bottom(0));
+      }
+    }
+    for (int j = 0; j < layer_connection.top_size(); ++j) {
+      const string& blob_name = layer_connection.top(j);
+      blob_name_to_last_top_idx[blob_name] = i;
+    }
+  }
+}
+
+bool UpgradeLayerParameter(const LayerParameter& v0_layer_connection,
+                           LayerParameter* layer_param) {
+  bool is_fully_compatible = true;
+  layer_param->Clear();
+  for (int i = 0; i < v0_layer_connection.bottom_size(); ++i) {
+    layer_param->add_bottom(v0_layer_connection.bottom(i));
+  }
+  for (int i = 0; i < v0_layer_connection.top_size(); ++i) {
+    layer_param->add_top(v0_layer_connection.top(i));
+  }
+  if (v0_layer_connection.has_layer()) {
+    const V0LayerParameter& v0_layer_param = v0_layer_connection.layer();
+    if (v0_layer_param.has_name()) {
+      layer_param->set_name(v0_layer_param.name());
+    }
+    const string& type = v0_layer_param.type();
+    if (v0_layer_param.has_type()) {
+      layer_param->set_type(UpgradeV0LayerType(type));
+    }
+    for (int i = 0; i < v0_layer_param.blobs_size(); ++i) {
+      layer_param->add_blobs()->CopyFrom(v0_layer_param.blobs(i));
+    }
+    for (int i = 0; i < v0_layer_param.blobs_lr_size(); ++i) {
+      layer_param->add_blobs_lr(v0_layer_param.blobs_lr(i));
+    }
+    for (int i = 0; i < v0_layer_param.weight_decay_size(); ++i) {
+      layer_param->add_weight_decay(v0_layer_param.weight_decay(i));
+    }
+    if (v0_layer_param.has_num_output()) {
+      if (type == "conv") {
+        layer_param->mutable_convolution_param()->set_num_output(
+            v0_layer_param.num_output());
+      } else if (type == "innerproduct") {
+        layer_param->mutable_inner_product_param()->set_num_output(
+            v0_layer_param.num_output());
+      } else {
+        LOG(ERROR) << "Unknown parameter num_output for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_biasterm()) {
+      if (type == "conv") {
+        layer_param->mutable_convolution_param()->set_bias_term(
+            v0_layer_param.biasterm());
+      } else if (type == "innerproduct") {
+        layer_param->mutable_inner_product_param()->set_bias_term(
+            v0_layer_param.biasterm());
+      } else {
+        LOG(ERROR) << "Unknown parameter biasterm for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_weight_filler()) {
+      if (type == "conv") {
+        layer_param->mutable_convolution_param()->
+            mutable_weight_filler()->CopyFrom(v0_layer_param.weight_filler());
+      } else if (type == "innerproduct") {
+        layer_param->mutable_inner_product_param()->
+            mutable_weight_filler()->CopyFrom(v0_layer_param.weight_filler());
+      } else {
+        LOG(ERROR) << "Unknown parameter weight_filler for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_bias_filler()) {
+      if (type == "conv") {
+        layer_param->mutable_convolution_param()->
+            mutable_bias_filler()->CopyFrom(v0_layer_param.bias_filler());
+      } else if (type == "innerproduct") {
+        layer_param->mutable_inner_product_param()->
+            mutable_bias_filler()->CopyFrom(v0_layer_param.bias_filler());
+      } else {
+        LOG(ERROR) << "Unknown parameter bias_filler for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_pad()) {
+      if (type == "conv") {
+        layer_param->mutable_convolution_param()->set_pad(v0_layer_param.pad());
+      } else {
+        LOG(ERROR) << "Unknown parameter pad for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_kernelsize()) {
+      if (type == "conv") {
+        layer_param->mutable_convolution_param()->set_kernel_size(
+            v0_layer_param.kernelsize());
+      } else if (type == "pool") {
+        layer_param->mutable_pooling_param()->set_kernel_size(
+            v0_layer_param.kernelsize());
+      } else {
+        LOG(ERROR) << "Unknown parameter kernelsize for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_group()) {
+      if (type == "conv") {
+        layer_param->mutable_convolution_param()->set_group(
+            v0_layer_param.group());
+      } else {
+        LOG(ERROR) << "Unknown parameter group for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_stride()) {
+      if (type == "conv") {
+        layer_param->mutable_convolution_param()->set_stride(
+            v0_layer_param.stride());
+      } else if (type == "pool") {
+        layer_param->mutable_pooling_param()->set_stride(
+            v0_layer_param.stride());
+      } else {
+        LOG(ERROR) << "Unknown parameter stride for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_pool()) {
+      if (type == "pool") {
+        V0LayerParameter_PoolMethod pool = v0_layer_param.pool();
+        switch (pool) {
+        case V0LayerParameter_PoolMethod_MAX:
+          layer_param->mutable_pooling_param()->set_pool(
+              PoolingParameter_PoolMethod_MAX);
+          break;
+        case V0LayerParameter_PoolMethod_AVE:
+          layer_param->mutable_pooling_param()->set_pool(
+              PoolingParameter_PoolMethod_AVE);
+          break;
+        case V0LayerParameter_PoolMethod_STOCHASTIC:
+          layer_param->mutable_pooling_param()->set_pool(
+              PoolingParameter_PoolMethod_STOCHASTIC);
+          break;
+        default:
+          LOG(ERROR) << "Unknown pool method " << pool;
+          is_fully_compatible = false;
+        }
+      } else {
+        LOG(ERROR) << "Unknown parameter pool for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_dropout_ratio()) {
+      if (type == "dropout") {
+        layer_param->mutable_dropout_param()->set_dropout_ratio(
+            v0_layer_param.dropout_ratio());
+      } else {
+        LOG(ERROR) << "Unknown parameter dropout_ratio for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_local_size()) {
+      if (type == "lrn") {
+        layer_param->mutable_lrn_param()->set_local_size(
+            v0_layer_param.local_size());
+      } else {
+        LOG(ERROR) << "Unknown parameter local_size for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_alpha()) {
+      if (type == "lrn") {
+        layer_param->mutable_lrn_param()->set_alpha(v0_layer_param.alpha());
+      } else {
+        LOG(ERROR) << "Unknown parameter alpha for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_beta()) {
+      if (type == "lrn") {
+        layer_param->mutable_lrn_param()->set_beta(v0_layer_param.beta());
+      } else {
+        LOG(ERROR) << "Unknown parameter beta for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_source()) {
+      if (type == "data") {
+        layer_param->mutable_data_param()->set_source(v0_layer_param.source());
+      } else if (type == "hdf5_data") {
+        layer_param->mutable_hdf5_data_param()->set_source(
+            v0_layer_param.source());
+      } else if (type == "images") {
+        layer_param->mutable_image_data_param()->set_source(
+            v0_layer_param.source());
+      } else if (type == "window_data") {
+        layer_param->mutable_window_data_param()->set_source(
+            v0_layer_param.source());
+      } else if (type == "infogain_loss") {
+        layer_param->mutable_infogain_loss_param()->set_source(
+            v0_layer_param.source());
+      } else {
+        LOG(ERROR) << "Unknown parameter source for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_scale()) {
+      if (type == "data") {
+        layer_param->mutable_data_param()->set_scale(v0_layer_param.scale());
+      } else if (type == "images") {
+        layer_param->mutable_image_data_param()->set_scale(
+           v0_layer_param.scale());
+      } else {
+        LOG(ERROR) << "Unknown parameter scale for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_meanfile()) {
+      if (type == "data") {
+        layer_param->mutable_data_param()->set_mean_file(
+            v0_layer_param.meanfile());
+      } else if (type == "images") {
+        layer_param->mutable_image_data_param()->set_mean_file(
+            v0_layer_param.meanfile());
+      } else if (type == "window_data") {
+        layer_param->mutable_window_data_param()->set_mean_file(
+            v0_layer_param.meanfile());
+      } else {
+        LOG(ERROR) << "Unknown parameter meanfile for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_batchsize()) {
+      if (type == "data") {
+        layer_param->mutable_data_param()->set_batch_size(
+            v0_layer_param.batchsize());
+      } else if (type == "hdf5_data") {
+        layer_param->mutable_hdf5_data_param()->set_batch_size(
+            v0_layer_param.batchsize());
+      } else if (type == "images") {
+        layer_param->mutable_image_data_param()->set_batch_size(
+            v0_layer_param.batchsize());
+      } else if (type == "window_data") {
+        layer_param->mutable_window_data_param()->set_batch_size(
+            v0_layer_param.batchsize());
+      } else {
+        LOG(ERROR) << "Unknown parameter batchsize for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_cropsize()) {
+      if (type == "data") {
+        layer_param->mutable_data_param()->set_crop_size(
+            v0_layer_param.cropsize());
+      } else if (type == "images") {
+        layer_param->mutable_image_data_param()->set_crop_size(
+            v0_layer_param.cropsize());
+      } else if (type == "window_data") {
+        layer_param->mutable_window_data_param()->set_crop_size(
+            v0_layer_param.cropsize());
+      } else {
+        LOG(ERROR) << "Unknown parameter cropsize for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_mirror()) {
+      if (type == "data") {
+        layer_param->mutable_data_param()->set_mirror(v0_layer_param.mirror());
+      } else if (type == "images") {
+        layer_param->mutable_image_data_param()->set_mirror(
+            v0_layer_param.mirror());
+      } else if (type == "window_data") {
+        layer_param->mutable_window_data_param()->set_mirror(
+            v0_layer_param.mirror());
+      } else {
+        LOG(ERROR) << "Unknown parameter mirror for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_rand_skip()) {
+      if (type == "data") {
+        layer_param->mutable_data_param()->set_rand_skip(
+            v0_layer_param.rand_skip());
+      } else if (type == "images") {
+        layer_param->mutable_image_data_param()->set_rand_skip(
+            v0_layer_param.rand_skip());
+      } else {
+        LOG(ERROR) << "Unknown parameter rand_skip for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_shuffle_images()) {
+      if (type == "images") {
+        layer_param->mutable_image_data_param()->set_shuffle(
+            v0_layer_param.shuffle_images());
+      } else {
+        LOG(ERROR) << "Unknown parameter shuffle for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_new_height()) {
+      if (type == "images") {
+        layer_param->mutable_image_data_param()->set_new_height(
+            v0_layer_param.new_height());
+      } else {
+        LOG(ERROR) << "Unknown parameter new_height for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_new_width()) {
+      if (type == "images") {
+        layer_param->mutable_image_data_param()->set_new_width(
+            v0_layer_param.new_width());
+      } else {
+        LOG(ERROR) << "Unknown parameter new_width for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_concat_dim()) {
+      if (type == "concat") {
+        layer_param->mutable_concat_param()->set_concat_dim(
+            v0_layer_param.concat_dim());
+      } else {
+        LOG(ERROR) << "Unknown parameter concat_dim for layer type " << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_det_fg_threshold()) {
+      if (type == "window_data") {
+        layer_param->mutable_window_data_param()->set_fg_threshold(
+            v0_layer_param.det_fg_threshold());
+      } else {
+        LOG(ERROR) << "Unknown parameter det_fg_threshold for layer type "
+                   << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_det_bg_threshold()) {
+      if (type == "window_data") {
+        layer_param->mutable_window_data_param()->set_bg_threshold(
+            v0_layer_param.det_bg_threshold());
+      } else {
+        LOG(ERROR) << "Unknown parameter det_bg_threshold for layer type "
+                   << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_det_fg_fraction()) {
+      if (type == "window_data") {
+        layer_param->mutable_window_data_param()->set_fg_fraction(
+            v0_layer_param.det_fg_fraction());
+      } else {
+        LOG(ERROR) << "Unknown parameter det_fg_fraction for layer type "
+                   << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_det_context_pad()) {
+      if (type == "window_data") {
+        layer_param->mutable_window_data_param()->set_context_pad(
+            v0_layer_param.det_context_pad());
+      } else {
+        LOG(ERROR) << "Unknown parameter det_context_pad for layer type "
+                   << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_det_crop_mode()) {
+      if (type == "window_data") {
+        layer_param->mutable_window_data_param()->set_crop_mode(
+            v0_layer_param.det_crop_mode());
+      } else {
+        LOG(ERROR) << "Unknown parameter det_crop_mode for layer type "
+                   << type;
+        is_fully_compatible = false;
+      }
+    }
+    if (v0_layer_param.has_hdf5_output_param()) {
+      if (type == "hdf5_output") {
+        layer_param->mutable_hdf5_output_param()->CopyFrom(
+            v0_layer_param.hdf5_output_param());
+      } else {
+        LOG(ERROR) << "Unknown parameter hdf5_output_param for layer type "
+                   << type;
+        is_fully_compatible = false;
+      }
+    }
+  }
+  return is_fully_compatible;
+}
+
+LayerParameter_LayerType UpgradeV0LayerType(const string& type) {
+  if (type == "accuracy") {
+    return LayerParameter_LayerType_ACCURACY;
+  } else if (type == "bnll") {
+    return LayerParameter_LayerType_BNLL;
+  } else if (type == "concat") {
+    return LayerParameter_LayerType_CONCAT;
+  } else if (type == "conv") {
+    return LayerParameter_LayerType_CONVOLUTION;
+  } else if (type == "data") {
+    return LayerParameter_LayerType_DATA;
+  } else if (type == "dropout") {
+    return LayerParameter_LayerType_DROPOUT;
+  } else if (type == "euclidean_loss") {
+    return LayerParameter_LayerType_EUCLIDEAN_LOSS;
+  } else if (type == "flatten") {
+    return LayerParameter_LayerType_FLATTEN;
+  } else if (type == "hdf5_data") {
+    return LayerParameter_LayerType_HDF5_DATA;
+  } else if (type == "hdf5_output") {
+    return LayerParameter_LayerType_HDF5_OUTPUT;
+  } else if (type == "im2col") {
+    return LayerParameter_LayerType_IM2COL;
+  } else if (type == "images") {
+    return LayerParameter_LayerType_IMAGE_DATA;
+  } else if (type == "infogain_loss") {
+    return LayerParameter_LayerType_INFOGAIN_LOSS;
+  } else if (type == "innerproduct") {
+    return LayerParameter_LayerType_INNER_PRODUCT;
+  } else if (type == "lrn") {
+    return LayerParameter_LayerType_LRN;
+  } else if (type == "multinomial_logistic_loss") {
+    return LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS;
+  } else if (type == "pool") {
+    return LayerParameter_LayerType_POOLING;
+  } else if (type == "relu") {
+    return LayerParameter_LayerType_RELU;
+  } else if (type == "sigmoid") {
+    return LayerParameter_LayerType_SIGMOID;
+  } else if (type == "softmax") {
+    return LayerParameter_LayerType_SOFTMAX;
+  } else if (type == "softmax_loss") {
+    return LayerParameter_LayerType_SOFTMAX_LOSS;
+  } else if (type == "split") {
+    return LayerParameter_LayerType_SPLIT;
+  } else if (type == "tanh") {
+    return LayerParameter_LayerType_TANH;
+  } else if (type == "window_data") {
+    return LayerParameter_LayerType_WINDOW_DATA;
+  } else {
+    LOG(FATAL) << "Unknown layer name: " << type;
+    return LayerParameter_LayerType_NONE;
+  }
+}
+
+void NetParameterToPrettyPrint(const NetParameter& param,
+                               NetParameterPrettyPrint* pretty_param) {
+  pretty_param->Clear();
+  if (param.has_name()) {
+    pretty_param->set_name(param.name());
+  }
+  if (param.has_force_backward()) {
+    pretty_param->set_force_backward(param.force_backward());
+  }
+  for (int i = 0; i < param.input_size(); ++i) {
+    pretty_param->add_input(param.input(i));
+  }
+  for (int i = 0; i < param.input_dim_size(); ++i) {
+    pretty_param->add_input_dim(param.input_dim(i));
+  }
+  for (int i = 0; i < param.layers_size(); ++i) {
+    pretty_param->add_layers()->CopyFrom(param.layers(i));
+  }
+}
+
+void ReadNetParamsFromTextFileOrDie(const string& param_file,
+                                    NetParameter* param) {
+  CHECK(ReadProtoFromTextFile(param_file, param))
+      << "Failed to parse NetParameter file: " << param_file;
+  if (NetNeedsUpgrade(*param)) {
+    // NetParameter was specified using the old style (V0LayerParameter); try to
+    // upgrade it.
+    LOG(ERROR) << "Attempting to upgrade input file specified using deprecated "
+               << "V0LayerParameter: " << param_file;
+    NetParameter original_param(*param);
+    if (!UpgradeV0Net(original_param, param)) {
+      LOG(ERROR) << "Warning: had one or more problems upgrading "
+          << "V0NetParameter to NetParameter (see above); continuing anyway.";
+    } else {
+      LOG(INFO) << "Successfully upgraded file specified using deprecated "
+                << "V0LayerParameter";
+    }
+    LOG(ERROR) << "Note that future Caffe releases will not support "
+        << "V0NetParameter; use ./build/tools/upgrade_net_proto_text.bin to "
+        << "upgrade this and any other network proto files to the new format.";
+  }
+}
+
+void ReadNetParamsFromBinaryFileOrDie(const string& param_file,
+                                      NetParameter* param) {
+  CHECK(ReadProtoFromBinaryFile(param_file, param))
+      << "Failed to parse NetParameter file: " << param_file;
+  if (NetNeedsUpgrade(*param)) {
+    // NetParameter was specified using the old style (V0LayerParameter); try to
+    // upgrade it.
+    LOG(ERROR) << "Attempting to upgrade input file specified using deprecated "
+               << "V0LayerParameter: " << param_file;
+    NetParameter original_param(*param);
+    if (!UpgradeV0Net(original_param, param)) {
+      LOG(ERROR) << "Warning: had one or more problems upgrading "
+          << "V0NetParameter to NetParameter (see above); continuing anyway.";
+    } else {
+      LOG(INFO) << "Successfully upgraded file specified using deprecated "
+                << "V0LayerParameter";
+    }
+    LOG(ERROR) << "Note that future Caffe releases will not support "
+        << "V0NetParameter; use ./build/tools/upgrade_net_proto_binary.bin to "
+        << "upgrade this and any other network proto files to the new format.";
+  }
+}
+
+}  // namespace caffe
diff --git a/tools/dump_network.cpp b/tools/dump_network.cpp
index f5c36820083..f29e150b048 100644
--- a/tools/dump_network.cpp
+++ b/tools/dump_network.cpp
@@ -31,16 +31,14 @@ int main(int argc, char** argv) {
   Caffe::set_mode(Caffe::GPU);
   Caffe::set_phase(Caffe::TEST);
 
-  NetParameter net_param;
-  NetParameter trained_net_param;
-
+  shared_ptr<Net<float> > caffe_net;
   if (strcmp(argv[1], "none") == 0) {
     // We directly load the net param from trained file
-    ReadProtoFromBinaryFile(argv[2], &net_param);
+    caffe_net.reset(new Net<float>(argv[2]));
   } else {
-    ReadProtoFromTextFile(argv[1], &net_param);
+    caffe_net.reset(new Net<float>(argv[1]));
   }
-  ReadProtoFromBinaryFile(argv[2], &trained_net_param);
+  caffe_net->CopyTrainedLayersFrom(argv[2]);
 
   vector<Blob<float>* > input_vec;
   shared_ptr<Blob<float> > input_blob(new Blob<float>());
@@ -51,9 +49,6 @@ int main(int argc, char** argv) {
     input_vec.push_back(input_blob.get());
   }
 
-  shared_ptr<Net<float> > caffe_net(new Net<float>(net_param));
-  caffe_net->CopyTrainedLayersFrom(trained_net_param);
-
   string output_prefix(argv[4]);
   // Run the network without training.
   LOG(ERROR) << "Performing Forward";
diff --git a/tools/extract_features.cpp b/tools/extract_features.cpp
index 4274db44619..cdad6676d7f 100644
--- a/tools/extract_features.cpp
+++ b/tools/extract_features.cpp
@@ -56,47 +56,40 @@ int feature_extraction_pipeline(int argc, char** argv) {
   }
   Caffe::set_phase(Caffe::TEST);
 
-  NetParameter pretrained_net_param;
-
   arg_pos = 0;  // the name of the executable
   string pretrained_binary_proto(argv[++arg_pos]);
-  ReadProtoFromBinaryFile(pretrained_binary_proto.c_str(),
-                          &pretrained_net_param);
 
   // Expected prototxt contains at least one data layer such as
   //  the layer data_layer_name and one feature blob such as the
   //  fc7 top blob to extract features.
   /*
    layers {
-   layer {
-   name: "data_layer_name"
-   type: "data"
-   source: "/path/to/your/images/to/extract/feature/images_leveldb"
-   meanfile: "/path/to/your/image_mean.binaryproto"
-   batchsize: 128
-   cropsize: 227
-   mirror: false
-   }
-   top: "data_blob_name"
-   top: "label_blob_name"
+     name: "data_layer_name"
+     type: DATA
+     data_param {
+       source: "/path/to/your/images/to/extract/feature/images_leveldb"
+       mean_file: "/path/to/your/image_mean.binaryproto"
+       batch_size: 128
+       crop_size: 227
+       mirror: false
+     }
+     top: "data_blob_name"
+     top: "label_blob_name"
    }
    layers {
-   layer {
-   name: "drop7"
-   type: "dropout"
-   dropout_ratio: 0.5
-   }
-   bottom: "fc7"
-   top: "fc7"
+     name: "drop7"
+     type: DROPOUT
+     dropout_param {
+       dropout_ratio: 0.5
+     }
+     bottom: "fc7"
+     top: "fc7"
    }
    */
-  NetParameter feature_extraction_net_param;
   string feature_extraction_proto(argv[++arg_pos]);
-  ReadProtoFromTextFile(feature_extraction_proto,
-                        &feature_extraction_net_param);
   shared_ptr<Net<Dtype> > feature_extraction_net(
-      new Net<Dtype>(feature_extraction_net_param));
-  feature_extraction_net->CopyTrainedLayersFrom(pretrained_net_param);
+      new Net<Dtype>(feature_extraction_proto));
+  feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto);
 
   string extract_feature_blob_name(argv[++arg_pos]);
   CHECK(feature_extraction_net->has_blob(extract_feature_blob_name))
diff --git a/tools/finetune_net.cpp b/tools/finetune_net.cpp
index db96b02ba87..c1cd788a1fb 100644
--- a/tools/finetune_net.cpp
+++ b/tools/finetune_net.cpp
@@ -20,7 +20,7 @@ int main(int argc, char** argv) {
   }
 
   SolverParameter solver_param;
-  ReadProtoFromTextFile(argv[1], &solver_param);
+  ReadProtoFromTextFileOrDie(argv[1], &solver_param);
 
   LOG(INFO) << "Starting Optimization";
   SGDSolver<float> solver(solver_param);
diff --git a/tools/net_speed_benchmark.cpp b/tools/net_speed_benchmark.cpp
index f52aac9cba7..36a00779f60 100644
--- a/tools/net_speed_benchmark.cpp
+++ b/tools/net_speed_benchmark.cpp
@@ -49,10 +49,7 @@ int main(int argc, char** argv) {
   }
 
   Caffe::set_phase(Caffe::TRAIN);
-  NetParameter net_param;
-  ReadProtoFromTextFile(argv[1],
-      &net_param);
-  Net<float> caffe_net(net_param);
+  Net<float> caffe_net(argv[1]);
 
   // Run the network without training.
   LOG(ERROR) << "Performing Forward";
diff --git a/tools/test_net.cpp b/tools/test_net.cpp
index 0abfbf68b27..559fa730667 100644
--- a/tools/test_net.cpp
+++ b/tools/test_net.cpp
@@ -34,9 +34,7 @@ int main(int argc, char** argv) {
     Caffe::set_mode(Caffe::CPU);
   }
 
-  NetParameter test_net_param;
-  ReadProtoFromTextFile(argv[1], &test_net_param);
-  Net<float> caffe_test_net(test_net_param);
+  Net<float> caffe_test_net(argv[1]);
   NetParameter trained_net_param;
   ReadProtoFromBinaryFile(argv[2], &trained_net_param);
   caffe_test_net.CopyTrainedLayersFrom(trained_net_param);
diff --git a/tools/train_net.cpp b/tools/train_net.cpp
index 751a70445e1..7c6f23e6240 100644
--- a/tools/train_net.cpp
+++ b/tools/train_net.cpp
@@ -21,7 +21,7 @@ int main(int argc, char** argv) {
   }
 
   SolverParameter solver_param;
-  ReadProtoFromTextFile(argv[1], &solver_param);
+  ReadProtoFromTextFileOrDie(argv[1], &solver_param);
 
   LOG(INFO) << "Starting Optimization";
   SGDSolver<float> solver(solver_param);
diff --git a/tools/upgrade_net_proto_binary.cpp b/tools/upgrade_net_proto_binary.cpp
new file mode 100644
index 00000000000..928fc52dc27
--- /dev/null
+++ b/tools/upgrade_net_proto_binary.cpp
@@ -0,0 +1,46 @@
+// Copyright 2014 BVLC and contributors.
+//
+// This is a script to upgrade "V0" network prototxts to the new format.
+// Usage:
+//    upgrade_net_proto_binary v0_net_proto_file_in net_proto_file_out
+
+#include <cstring>
+#include <iostream>  // NOLINT(readability/streams)
+#include <fstream>  // NOLINT(readability/streams)
+
+#include "caffe/caffe.hpp"
+#include "caffe/util/io.hpp"
+#include "caffe/util/upgrade_proto.hpp"
+
+using std::ofstream;
+
+using namespace caffe;  // NOLINT(build/namespaces)
+
+int main(int argc, char** argv) {
+  ::google::InitGoogleLogging(argv[0]);
+  if (argc != 3) {
+    LOG(ERROR) << "Usage: "
+        << "upgrade_net_proto_binary v0_net_proto_file_in net_proto_file_out";
+    return 1;
+  }
+
+  NetParameter net_param;
+  if (!ReadProtoFromBinaryFile(argv[1], &net_param)) {
+    LOG(ERROR) << "Failed to parse input binary file as NetParameter: "
+               << argv[1];
+    return 2;
+  }
+  bool need_upgrade = NetNeedsUpgrade(net_param);
+  bool success = true;
+  if (need_upgrade) {
+    NetParameter v0_net_param(net_param);
+    success = UpgradeV0Net(v0_net_param, &net_param);
+  } else {
+    LOG(ERROR) << "File already in V1 proto format: " << argv[1];
+  }
+
+  WriteProtoToBinaryFile(net_param, argv[2]);
+
+  LOG(ERROR) << "Wrote upgraded NetParameter binary proto to " << argv[2];
+  return !success;
+}
diff --git a/tools/upgrade_net_proto_text.cpp b/tools/upgrade_net_proto_text.cpp
new file mode 100644
index 00000000000..aefdc7e2961
--- /dev/null
+++ b/tools/upgrade_net_proto_text.cpp
@@ -0,0 +1,57 @@
+// Copyright 2014 BVLC and contributors.
+//
+// This is a script to upgrade "V0" network prototxts to the new format.
+// Usage:
+//    upgrade_net_proto_text v0_net_proto_file_in net_proto_file_out
+
+#include <cstring>
+#include <iostream>  // NOLINT(readability/streams)
+#include <fstream>  // NOLINT(readability/streams)
+
+#include "caffe/caffe.hpp"
+#include "caffe/util/io.hpp"
+#include "caffe/util/upgrade_proto.hpp"
+
+using std::ofstream;
+
+using namespace caffe;  // NOLINT(build/namespaces)
+
+int main(int argc, char** argv) {
+  ::google::InitGoogleLogging(argv[0]);
+  if (argc != 3) {
+    LOG(ERROR) << "Usage: "
+        << "upgrade_net_proto_text v0_net_proto_file_in net_proto_file_out";
+    return 1;
+  }
+
+  NetParameter net_param;
+  if (!ReadProtoFromTextFile(argv[1], &net_param)) {
+    LOG(ERROR) << "Failed to parse input text file as NetParameter: "
+               << argv[1];
+    return 2;
+  }
+  bool need_upgrade = NetNeedsUpgrade(net_param);
+  bool success = true;
+  if (need_upgrade) {
+    NetParameter v0_net_param(net_param);
+    success = UpgradeV0Net(v0_net_param, &net_param);
+  } else {
+    LOG(ERROR) << "File already in V1 proto format: " << argv[1];
+  }
+
+  // Convert to a NetParameterPrettyPrint to print fields in desired
+  // order.
+  NetParameterPrettyPrint net_param_pretty;
+  NetParameterToPrettyPrint(net_param, &net_param_pretty);
+
+  // TODO(jdonahue): figure out why WriteProtoToTextFile doesn't work
+  // (no file is created).
+  // WriteProtoToTextFile(net_param_pretty, argv[2]);
+  ofstream output_proto;
+  output_proto.open(argv[2]);
+  output_proto << net_param_pretty.DebugString();
+  output_proto.close();
+
+  LOG(ERROR) << "Wrote upgraded NetParameter text proto to " << argv[2];
+  return !success;
+}