diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
index 4765398aa7b..091fc0f65c5 100644
--- a/include/caffe/vision_layers.hpp
+++ b/include/caffe/vision_layers.hpp
@@ -198,6 +198,25 @@ class AccuracyLayer : public Layer<Dtype> {
   }
 };
 
+template <typename Dtype>
+class ArgMaxLayer : public Layer<Dtype> {
+ public:
+  explicit ArgMaxLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+
+ protected:
+  virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  // For now ArgMax layer should not be used to compute backward operations.
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+    NOT_IMPLEMENTED;
+  }
+  bool out_max_val_;
+};
+
 template <typename Dtype>
 class ConcatLayer : public Layer<Dtype> {
  public:
diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
index 2991c81f559..ae15ba5bb44 100644
--- a/src/caffe/layer_factory.cpp
+++ b/src/caffe/layer_factory.cpp
@@ -24,6 +24,8 @@ Layer<Dtype>* GetLayer(const LayerParameter& param) {
   switch (type) {
   case LayerParameter_LayerType_ACCURACY:
     return new AccuracyLayer<Dtype>(param);
+  case LayerParameter_LayerType_ARGMAX:
+    return new ArgMaxLayer<Dtype>(param);
   case LayerParameter_LayerType_BNLL:
     return new BNLLLayer<Dtype>(param);
   case LayerParameter_LayerType_CONCAT:
diff --git a/src/caffe/layers/argmax_layer.cpp b/src/caffe/layers/argmax_layer.cpp
new file mode 100644
index 00000000000..33ec1d34089
--- /dev/null
+++ b/src/caffe/layers/argmax_layer.cpp
@@ -0,0 +1,58 @@
+// Copyright 2014 BVLC and contributors.
+
+#include <vector>
+#include <cfloat>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+
+
+namespace caffe {
+
+template <typename Dtype>
+void ArgMaxLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  CHECK_EQ(bottom.size(), 1) << "ArgMaxLayer Layer takes 1 input.";
+  CHECK_EQ(top->size(), 1) << "ArgMaxLayer Layer takes 1 output.";
+  out_max_val_ = this->layer_param_.argmax_param().out_max_val();
+  // Produces max_ind and max_val
+  if (out_max_val_) { 
+    (*top)[0]->Reshape(bottom[0]->num(), 2, 1, 1);
+  } // Produces only max_ind
+  else {
+    (*top)[0]->Reshape(bottom[0]->num(), 1, 1, 1);
+  }
+}
+
+template <typename Dtype>
+Dtype ArgMaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+    vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  Dtype* top_data = (*top)[0]->mutable_cpu_data();
+  int num = bottom[0]->num();
+  int dim = bottom[0]->count() / bottom[0]->num();
+  for (int i = 0; i < num; ++i) {
+    // Accuracy
+    Dtype max_val = -FLT_MAX;
+    int max_ind = 0;
+    for (int j = 0; j < dim; ++j) {
+      if (bottom_data[i * dim + j] > max_val) {
+        max_val = bottom_data[i * dim + j];
+        max_ind = j;
+      }
+    }
+    if (out_max_val_) {
+      top_data[i * 2] = max_ind;
+      top_data[i * 2 + 1] = max_val;
+    }
+    else {
+      top_data[i] = max_ind;
+    }
+  }
+  return Dtype(0);
+}
+
+INSTANTIATE_CLASS(ArgMaxLayer);
+
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index ab3c2fecc5c..bc1d1478c87 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -149,6 +149,7 @@ message LayerParameter {
     SPLIT = 22;
     TANH = 23;
     WINDOW_DATA = 24;
+    ARGMAX = 30;
   }
   optional LayerType type = 5; // the layer type from the enum above
 
@@ -175,6 +176,7 @@ message LayerParameter {
   optional PoolingParameter pooling_param = 19;
   optional PowerParameter power_param = 21;
   optional WindowDataParameter window_data_param = 20;
+  optional ArgMaxParameter argmax_param = 23;
 
   // DEPRECATED: The layer parameters specified as a V0LayerParameter.
   // This should never be used by any code except to upgrade to the new
@@ -182,6 +184,13 @@ message LayerParameter {
   optional V0LayerParameter layer = 1;
 }
 
+// Message that stores parameters used by ArgMaxLayer
+
+message ArgMaxParameter {
+  // If true produce pairs (argmax, maxval) 
+  optional bool out_max_val = 1 [default = false]; 
+}
+
 // Message that stores parameters used by ConcatLayer
 message ConcatParameter {
   // Concat Layer needs to specify the dimension along the concat will happen,
diff --git a/src/caffe/test/test_argmax_layer.cpp b/src/caffe/test/test_argmax_layer.cpp
new file mode 100644
index 00000000000..627dd575904
--- /dev/null
+++ b/src/caffe/test/test_argmax_layer.cpp
@@ -0,0 +1,113 @@
+// Copyright 2014 BVLC and contributors.
+
+#include <vector>
+
+#include "cuda_runtime.h"
+#include "gtest/gtest.h"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+
+namespace caffe {
+
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
+
+template <typename Dtype>
+class ArgMaxLayerTest : public ::testing::Test {
+ protected:
+  ArgMaxLayerTest()
+      : blob_bottom_(new Blob<Dtype>(20, 10, 1, 1)),
+        blob_top_(new Blob<Dtype>()) {
+    Caffe::set_random_seed(1701);
+    // fill the values
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+    blob_bottom_vec_.push_back(blob_bottom_);
+    blob_top_vec_.push_back(blob_top_);
+  }
+  virtual ~ArgMaxLayerTest() { delete blob_bottom_; delete blob_top_; }
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+typedef ::testing::Types<float, double> Dtypes;
+TYPED_TEST_CASE(ArgMaxLayerTest, Dtypes);
+
+
+TYPED_TEST(ArgMaxLayerTest, TestSetup) {
+  LayerParameter layer_param;
+  ArgMaxLayer<TypeParam> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_->num());
+  EXPECT_EQ(this->blob_top_->channels(), 1);
+}
+
+TYPED_TEST(ArgMaxLayerTest, TestSetupMaxVal) {
+  LayerParameter layer_param;
+  ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param();
+  argmax_param->set_out_max_val(true);
+  ArgMaxLayer<TypeParam> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_->num());
+  EXPECT_EQ(this->blob_top_->channels(), 2);
+}
+
+TYPED_TEST(ArgMaxLayerTest, TestCPU) {
+  LayerParameter layer_param;
+  Caffe::set_mode(Caffe::CPU);
+  ArgMaxLayer<TypeParam> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  // Now, check values
+  const TypeParam* bottom_data = this->blob_bottom_->cpu_data();
+  const TypeParam* top_data = this->blob_top_->cpu_data();
+  int max_ind;
+  TypeParam max_val;
+  int num = this->blob_bottom_->num();
+  int dim = this->blob_bottom_->count() / num;
+  for (int i = 0; i < num; ++i) {
+    EXPECT_GE(top_data[i], 0);
+    EXPECT_LE(top_data[i], dim);
+    max_ind = top_data[i];
+    max_val = bottom_data[i * dim + max_ind];
+    for (int j = 0; j < dim; ++j) {
+      EXPECT_LE(bottom_data[i * dim + j], max_val);
+    }
+  }
+}
+
+TYPED_TEST(ArgMaxLayerTest, TestCPUMaxVal) {
+  LayerParameter layer_param;
+  Caffe::set_mode(Caffe::CPU);
+  ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param();
+  argmax_param->set_out_max_val(true);
+  ArgMaxLayer<TypeParam> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  // Now, check values
+  const TypeParam* bottom_data = this->blob_bottom_->cpu_data();
+  const TypeParam* top_data = this->blob_top_->cpu_data();
+  int max_ind;
+  TypeParam max_val;
+  int num = this->blob_bottom_->num();
+  int dim = this->blob_bottom_->count() / num;
+  for (int i = 0; i < num; ++i) {
+    EXPECT_GE(top_data[i], 0);
+    EXPECT_LE(top_data[i], dim);
+    max_ind = top_data[i * 2];
+    max_val = top_data[i * 2 + 1];
+    EXPECT_EQ(bottom_data[i * dim + max_ind],max_val);
+    for (int j = 0; j < dim; ++j) {
+      EXPECT_LE(bottom_data[i * dim + j], max_val);
+    }
+  }
+}
+
+}  // namespace caffe