From 5ed7a88a2ac5578d95a768493df5eccf184eefeb Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Sat, 1 Jul 2017 23:33:03 -0700
Subject: [PATCH 01/20] Reorganizing the CMake files

---
 AFBuildMacros.cmake                | 65 ------------------------------
 CMakeLists.txt                     | 31 ++++----------
 {test => examples}/Activations.cpp |  0
 examples/CMakeLists.txt            | 22 ++++++++++
 {test => examples}/FFNet.cpp       |  1 +
 {test => examples}/Node.cpp        |  0
 {test => examples}/Weights.cpp     |  0
 {test => examples}/perceptron.cpp  |  0
 8 files changed, 30 insertions(+), 89 deletions(-)
 delete mode 100644 AFBuildMacros.cmake
 rename {test => examples}/Activations.cpp (100%)
 create mode 100644 examples/CMakeLists.txt
 rename {test => examples}/FFNet.cpp (98%)
 rename {test => examples}/Node.cpp (100%)
 rename {test => examples}/Weights.cpp (100%)
 rename {test => examples}/perceptron.cpp (100%)

diff --git a/AFBuildMacros.cmake b/AFBuildMacros.cmake
deleted file mode 100644
index 64ea17e..0000000
--- a/AFBuildMacros.cmake
+++ /dev/null
@@ -1,65 +0,0 @@
-# A macro to build an ArrayFire example
-# For most uses only FIND_PACKAGE(ArrayFire REQUIRED), ADD_EXECUTABLE(...)
-# and TARGET_LINK_LIBRARIES(... ${ARRAYFIRE_LIBRARIES}) are needed
-MACRO(BUILD_SRC SRC_NAME SRC_SOURCE BACKEND_NAME BACKEND_LIBRARIES OTHER_LIBRARIES OUT_DIR_NAME)
-
-  ADD_EXECUTABLE(example_${SRC_NAME}_${BACKEND_NAME} ${SRC_SOURCE})
-
-  TARGET_LINK_LIBRARIES(example_${SRC_NAME}_${BACKEND_NAME}
-    ${BACKEND_LIBRARIES} ${OTHER_LIBRARIES})
-
-  SET_TARGET_PROPERTIES(example_${SRC_NAME}_${BACKEND_NAME}
-    PROPERTIES
-    OUTPUT_NAME ${SRC_NAME}_${BACKEND_NAME}
-    RUNTIME_OUTPUT_DIRECTORY ${OUT_DIR_NAME}
-    FOLDER "${BACKEND_NAME}")
-ENDMACRO()
-
-# A macro to build a list of files
-# For most uses only FIND_PACKAGE(ArrayFire REQUIRED), ADD_EXECUTABLE(...)
-# and TARGET_LINK_LIBRARIES(... ${ARRAYFIRE_LIBRARIES}) are needed
-MACRO(BUILD_BACKEND FILES BACKEND_NAME BACKEND_LIBRARIES OTHER_LIBRARIES)
-
-  FOREACH(FILE ${FILES})
-    GET_FILENAME_COMPONENT(SRC ${FILE} NAME_WE)
-    GET_FILENAME_COMPONENT(FULL_DIR_NAME ${FILE} PATH)
-    GET_FILENAME_COMPONENT(DIR_NAME ${FULL_DIR_NAME} NAME)
-
-    BUILD_SRC(${SRC} ${FILE} ${BACKEND_NAME}
-      "${BACKEND_LIBRARIES}"
-      "${OTHER_LIBRARIES}" ${DIR_NAME})
-  ENDFOREACH()
-ENDMACRO()
-
-MACRO(BUILD_ALL FILES)
-  FIND_PACKAGE(ArrayFire REQUIRED)
-  FIND_PACKAGE(CUDA QUIET)
-  FIND_PACKAGE(OpenCL QUIET)
-
-  INCLUDE_DIRECTORIES(
-    "${CMAKE_CURRENT_SOURCE_DIR}/include"
-    ${ArrayFire_INCLUDE_DIRS}
-    )
-
-  IF (${ArrayFire_CPU_FOUND})
-    MESSAGE(STATUS "CPU backend is ON.")
-    BUILD_BACKEND("${FILES}" cpu ${ArrayFire_CPU_LIBRARIES} "")
-  ENDIF()
-
-  IF (${OpenCL_FOUND} AND ${ArrayFire_OpenCL_FOUND})
-    MESSAGE(STATUS "OPENCL backend is ON.")
-    BUILD_BACKEND("${FILES}" opencl ${ArrayFire_OpenCL_LIBRARIES} "${OpenCL_LIBRARIES}")
-  ENDIF()
-
-  IF (${CUDA_FOUND} AND ${ArrayFire_CUDA_FOUND})
-    FIND_LIBRARY( CUDA_NVVM_LIBRARY
-      NAMES "nvvm"
-      PATH_SUFFIXES "nvvm/lib64" "nvvm/lib"
-      PATHS ${CUDA_TOOLKIT_ROOT_DIR}
-      DOC "CUDA NVVM Library"
-      )
-
-    MESSAGE(STATUS "CUDA backend is ON.")
-    BUILD_BACKEND("${FILES}" cuda ${ArrayFire_CUDA_LIBRARIES} "${CUDA_CUBLAS_LIBRARIES};${CUDA_LIBRARIES};${CUDA_cusolver_LIBRARY};${CUDA_CUFFT_LIBRARIES};${CUDA_NVVM_LIBRARY};${CUDA_CUDA_LIBRARY}")
-  ENDIF()
-ENDMACRO()
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d03e555..104d635 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,26 +1,9 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
-PROJECT(ARRAYFIRE_ML)
+cmake_minimum_required(VERSION 3.5.2)
 
-SET_PROPERTY(GLOBAL PROPERTY USE_FOLDERS ON)
-ADD_DEFINITIONS(-Wall -std=c++11 -fvisibility=hidden)
+project(ArrayFireML
+  VERSION 0.1.0
+  LANGUAGES C CXX)
 
-OPTION(BUILD_TEST "Build Tests" ON)
-
-# Header files
-IF(NOT DEFINED AFML_INSTALL_INC_DIR)
-  SET(AFML_INSTALL_INC_DIR "include" CACHE PATH "Installation path for headers")
-ENDIF()
-
-IF (BUILD_TEST)
-  FILE(GLOB FILES "test/*.cpp")
-  INCLUDE("${CMAKE_CURRENT_SOURCE_DIR}/AFBuildMacros.cmake")
-  BUILD_ALL("${FILES}")
-ENDIF()
-
-INSTALL(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/" DESTINATION "${AFML_INSTALL_INC_DIR}"
-  COMPONENT headers
-  FILES_MATCHING
-  PATTERN "*.h"
-  PATTERN "*.hpp"
-  PATTERN ".gitignore" EXCLUDE
-)
+find_package(ArrayFire REQUIRED)
+set(ArrayFireML_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
+add_subdirectory(examples)
diff --git a/test/Activations.cpp b/examples/Activations.cpp
similarity index 100%
rename from test/Activations.cpp
rename to examples/Activations.cpp
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 0000000..fd22342
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,22 @@
+function(build_example SRC)
+  get_filename_component(src_name ${SRC} NAME_WE)
+  set(target "${src_name}")
+  add_executable(${target} ${SRC})
+  target_include_directories(${target}
+    PRIVATE
+    ${ArrayFire_INCLUDE_DIRS}
+    ${ArrayFireML_INCLUDE_DIRS}
+    )
+  target_link_libraries(${target}
+    PRIVATE
+    af
+    )
+  target_compile_features(${target}
+    PRIVATE cxx_range_for)
+endfunction(build_example)
+
+build_example(Activations.cpp)
+build_example(FFNet.cpp)
+build_example(Node.cpp)
+build_example(perceptron.cpp)
+build_example(Weights.cpp)
diff --git a/test/FFNet.cpp b/examples/FFNet.cpp
similarity index 98%
rename from test/FFNet.cpp
rename to examples/FFNet.cpp
index b9c9b62..93fb6bf 100644
--- a/test/FFNet.cpp
+++ b/examples/FFNet.cpp
@@ -15,6 +15,7 @@ using namespace afml::nn;
 
 int main()
 {
+    af::info();
     const int inputSize  = 2;
     const int hiddenSize = 3;
     const int outputSize = 1;
diff --git a/test/Node.cpp b/examples/Node.cpp
similarity index 100%
rename from test/Node.cpp
rename to examples/Node.cpp
diff --git a/test/Weights.cpp b/examples/Weights.cpp
similarity index 100%
rename from test/Weights.cpp
rename to examples/Weights.cpp
diff --git a/test/perceptron.cpp b/examples/perceptron.cpp
similarity index 100%
rename from test/perceptron.cpp
rename to examples/perceptron.cpp

From dfa8fda802688db5abe6cdb58d186f16b470f058 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Sun, 2 Jul 2017 01:01:04 -0700
Subject: [PATCH 02/20] Reorganizing the include files and namespace

---
 examples/Activations.cpp                          |  8 ++++----
 examples/FFNet.cpp                                |  6 +++---
 examples/Node.cpp                                 |  4 ++--
 examples/Weights.cpp                              |  4 ++--
 examples/perceptron.cpp                           |  6 +++---
 include/{afml.h => af/nn.h}                       |  7 ++++++-
 include/{afml/nn.h => af/nn/Activations.hpp}      | 11 +++++------
 .../{afml => af}/nn/Activations/Activation.hpp    |  6 +++---
 include/{afml => af}/nn/Activations/ReLU.hpp      |  6 +++---
 include/{afml => af}/nn/Activations/Sigmoid.hpp   |  4 ++--
 include/{afml => af}/nn/Activations/Tanh.hpp      |  4 ++--
 include/{afml => af}/nn/Activations/Threshold.hpp |  4 ++--
 include/{afml => af}/nn/Networks.hpp              |  2 +-
 include/{afml => af}/nn/Networks/FFNet.hpp        | 10 +++++-----
 include/{afml => af}/nn/Nodes.hpp                 |  4 ++--
 include/{afml => af}/nn/Nodes/Linear.hpp          |  8 ++++----
 include/{afml => af}/nn/Nodes/Node.hpp            |  6 +++---
 include/{afml => af}/nn/Weights.hpp               |  4 ++--
 include/{afml/util => af/nn}/common.hpp           |  2 +-
 include/afml/nn/Activations.hpp                   | 15 ---------------
 20 files changed, 55 insertions(+), 66 deletions(-)
 rename include/{afml.h => af/nn.h} (70%)
 rename include/{afml/nn.h => af/nn/Activations.hpp} (62%)
 rename include/{afml => af}/nn/Activations/Activation.hpp (93%)
 rename include/{afml => af}/nn/Activations/ReLU.hpp (79%)
 rename include/{afml => af}/nn/Activations/Sigmoid.hpp (94%)
 rename include/{afml => af}/nn/Activations/Tanh.hpp (93%)
 rename include/{afml => af}/nn/Activations/Threshold.hpp (94%)
 rename include/{afml => af}/nn/Networks.hpp (90%)
 rename include/{afml => af}/nn/Networks/FFNet.hpp (95%)
 rename include/{afml => af}/nn/Nodes.hpp (83%)
 rename include/{afml => af}/nn/Nodes/Linear.hpp (94%)
 rename include/{afml => af}/nn/Nodes/Node.hpp (97%)
 rename include/{afml => af}/nn/Weights.hpp (98%)
 rename include/{afml/util => af/nn}/common.hpp (97%)
 delete mode 100644 include/afml/nn/Activations.hpp

diff --git a/examples/Activations.cpp b/examples/Activations.cpp
index 098c864..594bd5f 100644
--- a/examples/Activations.cpp
+++ b/examples/Activations.cpp
@@ -7,16 +7,16 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
-#include <afml/nn/Activations.hpp>
+#include <af/nn/Activations.hpp>
 
-using namespace afml::nn;
+using namespace af::nn;
 
 int main()
 {
     const int num = 5;
 
-    afml::ArrayVector in = {100 * af::randu(num, 1) - 50};
-    afml::ArrayVector grad = {100 * af::randu(num, 1)};
+    af::ArrayVector in = {100 * af::randu(num, 1) - 50};
+    af::ArrayVector grad = {100 * af::randu(num, 1)};
 
     ReLU    r = ReLU(num, 0);
     Sigmoid s = Sigmoid(num);
diff --git a/examples/FFNet.cpp b/examples/FFNet.cpp
index 93fb6bf..ec69ed1 100644
--- a/examples/FFNet.cpp
+++ b/examples/FFNet.cpp
@@ -7,11 +7,11 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
-#include <afml/nn.h>
+#include <af/nn.h>
 
 using namespace af;
-using namespace afml;
-using namespace afml::nn;
+using namespace af;
+using namespace af::nn;
 
 int main()
 {
diff --git a/examples/Node.cpp b/examples/Node.cpp
index 33a0a90..eb4229a 100644
--- a/examples/Node.cpp
+++ b/examples/Node.cpp
@@ -7,9 +7,9 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
-#include <afml/nn/Nodes/Node.hpp>
+#include <af/nn/Nodes/Node.hpp>
 
-using namespace afml::nn;
+using namespace af::nn;
 
 int main()
 {
diff --git a/examples/Weights.cpp b/examples/Weights.cpp
index 8f65895..9e83c06 100644
--- a/examples/Weights.cpp
+++ b/examples/Weights.cpp
@@ -7,9 +7,9 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
-#include <afml/nn/Weights.hpp>
+#include <af/nn/Weights.hpp>
 
-using namespace afml::nn;
+using namespace af::nn;
 
 int main()
 {
diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp
index 213378c..9975a03 100644
--- a/examples/perceptron.cpp
+++ b/examples/perceptron.cpp
@@ -7,10 +7,10 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
-#include <afml/nn.h>
+#include <af/nn.h>
 
-using namespace afml;
-using namespace afml::nn;
+using namespace af;
+using namespace af::nn;
 
 int main()
 {
diff --git a/include/afml.h b/include/af/nn.h
similarity index 70%
rename from include/afml.h
rename to include/af/nn.h
index 279f7aa..8ee2134 100644
--- a/include/afml.h
+++ b/include/af/nn.h
@@ -8,4 +8,9 @@
  ********************************************************/
 
 #pragma once
-#include <afml/nn.h>
+
+#include <af/nn/common.hpp>
+#include <af/nn/Nodes.hpp>
+#include <af/nn/Weights.hpp>
+#include <af/nn/Activations.hpp>
+#include <af/nn/Networks.hpp>
diff --git a/include/afml/nn.h b/include/af/nn/Activations.hpp
similarity index 62%
rename from include/afml/nn.h
rename to include/af/nn/Activations.hpp
index 98a7a4b..4972238 100644
--- a/include/afml/nn.h
+++ b/include/af/nn/Activations.hpp
@@ -6,11 +6,10 @@
  * The complete license agreement can be obtained at:
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
-
 #pragma once
 
-#include <afml/util/common.hpp>
-#include <afml/nn/Nodes.hpp>
-#include <afml/nn/Weights.hpp>
-#include <afml/nn/Activations.hpp>
-#include <afml/nn/Networks.hpp>
+#include <af/nn/Activations/Activation.hpp>
+#include <af/nn/Activations/Sigmoid.hpp>
+#include <af/nn/Activations/Tanh.hpp>
+#include <af/nn/Activations/Threshold.hpp>
+#include <af/nn/Activations/ReLU.hpp>
diff --git a/include/afml/nn/Activations/Activation.hpp b/include/af/nn/Activations/Activation.hpp
similarity index 93%
rename from include/afml/nn/Activations/Activation.hpp
rename to include/af/nn/Activations/Activation.hpp
index 52e0346..d98eaf3 100644
--- a/include/afml/nn/Activations/Activation.hpp
+++ b/include/af/nn/Activations/Activation.hpp
@@ -8,10 +8,10 @@
  ********************************************************/
 #pragma once
 
-#include <afml/util/common.hpp>
-#include <afml/nn/Nodes/Node.hpp>
+#include <af/nn/common.hpp>
+#include <af/nn/Nodes/Node.hpp>
 
-namespace afml
+namespace af
 {
 
     namespace nn
diff --git a/include/afml/nn/Activations/ReLU.hpp b/include/af/nn/Activations/ReLU.hpp
similarity index 79%
rename from include/afml/nn/Activations/ReLU.hpp
rename to include/af/nn/Activations/ReLU.hpp
index 55b3703..3c47684 100644
--- a/include/afml/nn/Activations/ReLU.hpp
+++ b/include/af/nn/Activations/ReLU.hpp
@@ -8,10 +8,10 @@
  ********************************************************/
 #pragma once
 
-#include <afml/nn/Activations/Activation.hpp>
-#include <afml/nn/Activations/Threshold.hpp>
+#include <af/nn/Activations/Activation.hpp>
+#include <af/nn/Activations/Threshold.hpp>
 
-namespace afml
+namespace af
 {
     namespace nn
     {
diff --git a/include/afml/nn/Activations/Sigmoid.hpp b/include/af/nn/Activations/Sigmoid.hpp
similarity index 94%
rename from include/afml/nn/Activations/Sigmoid.hpp
rename to include/af/nn/Activations/Sigmoid.hpp
index 6a45cac..1fd31d5 100644
--- a/include/afml/nn/Activations/Sigmoid.hpp
+++ b/include/af/nn/Activations/Sigmoid.hpp
@@ -8,9 +8,9 @@
  ********************************************************/
 #pragma once
 
-#include <afml/nn/Activations/Activation.hpp>
+#include <af/nn/Activations/Activation.hpp>
 
-namespace afml
+namespace af
 {
     namespace nn
     {
diff --git a/include/afml/nn/Activations/Tanh.hpp b/include/af/nn/Activations/Tanh.hpp
similarity index 93%
rename from include/afml/nn/Activations/Tanh.hpp
rename to include/af/nn/Activations/Tanh.hpp
index da5e7ef..abbcd07 100644
--- a/include/afml/nn/Activations/Tanh.hpp
+++ b/include/af/nn/Activations/Tanh.hpp
@@ -8,9 +8,9 @@
  ********************************************************/
 #pragma once
 
-#include <afml/nn/Activations/Activation.hpp>
+#include <af/nn/Activations/Activation.hpp>
 
-namespace afml
+namespace af
 {
     namespace nn
     {
diff --git a/include/afml/nn/Activations/Threshold.hpp b/include/af/nn/Activations/Threshold.hpp
similarity index 94%
rename from include/afml/nn/Activations/Threshold.hpp
rename to include/af/nn/Activations/Threshold.hpp
index 75dfc86..bded798 100644
--- a/include/afml/nn/Activations/Threshold.hpp
+++ b/include/af/nn/Activations/Threshold.hpp
@@ -8,9 +8,9 @@
  ********************************************************/
 #pragma once
 
-#include <afml/nn/Activations/Activation.hpp>
+#include <af/nn/Activations/Activation.hpp>
 
-namespace afml
+namespace af
 {
     namespace nn
     {
diff --git a/include/afml/nn/Networks.hpp b/include/af/nn/Networks.hpp
similarity index 90%
rename from include/afml/nn/Networks.hpp
rename to include/af/nn/Networks.hpp
index d6a08ca..2517121 100644
--- a/include/afml/nn/Networks.hpp
+++ b/include/af/nn/Networks.hpp
@@ -8,4 +8,4 @@
  ********************************************************/
 #pragma once
 
-#include <afml/nn/Networks/FFNet.hpp>
+#include <af/nn/Networks/FFNet.hpp>
diff --git a/include/afml/nn/Networks/FFNet.hpp b/include/af/nn/Networks/FFNet.hpp
similarity index 95%
rename from include/afml/nn/Networks/FFNet.hpp
rename to include/af/nn/Networks/FFNet.hpp
index 2308c02..1e00f18 100644
--- a/include/afml/nn/Networks/FFNet.hpp
+++ b/include/af/nn/Networks/FFNet.hpp
@@ -8,12 +8,12 @@
  ********************************************************/
 #pragma once
 
-#include <afml/util/common.hpp>
-#include <afml/nn/Weights.hpp>
-#include <afml/nn/Nodes/Linear.hpp>
-#include <afml/nn/Activations.hpp>
+#include <af/nn/common.hpp>
+#include <af/nn/Weights.hpp>
+#include <af/nn/Nodes/Linear.hpp>
+#include <af/nn/Activations.hpp>
 
-namespace afml
+namespace af
 {
     namespace nn
     {
diff --git a/include/afml/nn/Nodes.hpp b/include/af/nn/Nodes.hpp
similarity index 83%
rename from include/afml/nn/Nodes.hpp
rename to include/af/nn/Nodes.hpp
index df6bffc..a4f7c99 100644
--- a/include/afml/nn/Nodes.hpp
+++ b/include/af/nn/Nodes.hpp
@@ -8,5 +8,5 @@
  ********************************************************/
 #pragma once
 
-#include <afml/nn/Nodes/Node.hpp>
-#include <afml/nn/Nodes/Linear.hpp>
+#include <af/nn/Nodes/Node.hpp>
+#include <af/nn/Nodes/Linear.hpp>
diff --git a/include/afml/nn/Nodes/Linear.hpp b/include/af/nn/Nodes/Linear.hpp
similarity index 94%
rename from include/afml/nn/Nodes/Linear.hpp
rename to include/af/nn/Nodes/Linear.hpp
index 479ac2b..9ad2c85 100644
--- a/include/afml/nn/Nodes/Linear.hpp
+++ b/include/af/nn/Nodes/Linear.hpp
@@ -8,11 +8,11 @@
  ********************************************************/
 #pragma once
 
-#include <afml/util/common.hpp>
-#include <afml/nn/Weights.hpp>
-#include <afml/nn/Nodes/Node.hpp>
+#include <af/nn/common.hpp>
+#include <af/nn/Weights.hpp>
+#include <af/nn/Nodes/Node.hpp>
 
-namespace afml
+namespace af
 {
     namespace nn
     {
diff --git a/include/afml/nn/Nodes/Node.hpp b/include/af/nn/Nodes/Node.hpp
similarity index 97%
rename from include/afml/nn/Nodes/Node.hpp
rename to include/af/nn/Nodes/Node.hpp
index 4e522d0..ec4eb12 100644
--- a/include/afml/nn/Nodes/Node.hpp
+++ b/include/af/nn/Nodes/Node.hpp
@@ -8,13 +8,13 @@
  ********************************************************/
 #pragma once
 
-#include <afml/util/common.hpp>
-#include <afml/nn/Weights.hpp>
+#include <af/nn/common.hpp>
+#include <af/nn/Weights.hpp>
 
 #include <memory>
 #include <cstring>
 
-namespace afml
+namespace af
 {
     namespace nn
     {
diff --git a/include/afml/nn/Weights.hpp b/include/af/nn/Weights.hpp
similarity index 98%
rename from include/afml/nn/Weights.hpp
rename to include/af/nn/Weights.hpp
index 7250ba7..3efe645 100644
--- a/include/afml/nn/Weights.hpp
+++ b/include/af/nn/Weights.hpp
@@ -8,9 +8,9 @@
  ********************************************************/
 #pragma once
 
-#include <afml/util/common.hpp>
+#include <af/nn/common.hpp>
 
-namespace afml
+namespace af
 {
     namespace nn
     {
diff --git a/include/afml/util/common.hpp b/include/af/nn/common.hpp
similarity index 97%
rename from include/afml/util/common.hpp
rename to include/af/nn/common.hpp
index a9d7123..ae6f9c7 100644
--- a/include/afml/util/common.hpp
+++ b/include/af/nn/common.hpp
@@ -11,7 +11,7 @@
 #include <arrayfire.h>
 #include <vector>
 
-namespace afml
+namespace af
 {
     const int MAX_NAME_SIZE = 32;
 
diff --git a/include/afml/nn/Activations.hpp b/include/afml/nn/Activations.hpp
deleted file mode 100644
index 46d7907..0000000
--- a/include/afml/nn/Activations.hpp
+++ /dev/null
@@ -1,15 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <afml/nn/Activations/Activation.hpp>
-#include <afml/nn/Activations/Sigmoid.hpp>
-#include <afml/nn/Activations/Tanh.hpp>
-#include <afml/nn/Activations/Threshold.hpp>
-#include <afml/nn/Activations/ReLU.hpp>

From 562b8609a28db082c41155c277d60a85ae267070 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Sun, 2 Jul 2017 11:54:46 -0700
Subject: [PATCH 03/20] First attempt at implementing autograd

---
 examples/CMakeLists.txt           |   1 +
 examples/autograd.cpp             |  44 ++++++++
 include/af/autograd.h             |  11 ++
 include/af/autograd/Functions.hpp |  38 +++++++
 include/af/autograd/Grad.hpp      |  26 +++++
 include/af/autograd/Variable.hpp  | 176 ++++++++++++++++++++++++++++++
 6 files changed, 296 insertions(+)
 create mode 100644 examples/autograd.cpp
 create mode 100644 include/af/autograd.h
 create mode 100644 include/af/autograd/Functions.hpp
 create mode 100644 include/af/autograd/Grad.hpp
 create mode 100644 include/af/autograd/Variable.hpp

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index fd22342..9a39b82 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -20,3 +20,4 @@ build_example(FFNet.cpp)
 build_example(Node.cpp)
 build_example(perceptron.cpp)
 build_example(Weights.cpp)
+build_example(autograd.cpp)
diff --git a/examples/autograd.cpp b/examples/autograd.cpp
new file mode 100644
index 0000000..2f0f037
--- /dev/null
+++ b/examples/autograd.cpp
@@ -0,0 +1,44 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <af/autograd.h>
+
+using af::autograd::Variable;
+using af::autograd::backward;
+void test1()
+{
+    auto x = Variable(af::randu(5));
+    af_print(x.getData());
+    auto y = x * x;
+    af_print(y.getData());
+    auto dy = Variable(af::constant(1.0, 5));
+    backward(y, dy);
+    af_print(x.getGrad() - 2 * x.getData());
+}
+
+void test2()
+{
+    auto x = Variable(af::randu(5));
+    af_print(x.getData());
+    auto y = Variable(af::randu(5));
+    af_print(y.getData());
+    auto z = x * x + x * y + y * y;
+    auto dz = Variable(af::constant(1.0, 5));
+    backward(z, dz);
+    af_print(x.getGrad() - 2 * x.getData() - y.getData());
+    af_print(y.getGrad() - 2 * y.getData() - x.getData());
+}
+
+int main()
+{
+    af::info();
+    test1();
+    test2();
+    return 0;
+}
diff --git a/include/af/autograd.h b/include/af/autograd.h
new file mode 100644
index 0000000..e85625c
--- /dev/null
+++ b/include/af/autograd.h
@@ -0,0 +1,11 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#include <af/autograd/Variable.hpp>
+#include <af/autograd/Functions.hpp>
+#include <af/autograd/Grad.hpp>
diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp
new file mode 100644
index 0000000..0985ae8
--- /dev/null
+++ b/include/af/autograd/Functions.hpp
@@ -0,0 +1,38 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#pragma once
+
+#include <af/autograd/Variable.hpp>
+
+namespace af {
+    namespace autograd {
+
+        Variable operator +(const Variable lhs, const Variable rhs)
+        {
+            auto result = lhs.getData() + rhs.getData();
+            auto backward = [](std::vector<Variable> inputs, Variable grad_output) {
+                inputs[0].addGrad(grad_output);
+                inputs[1].addGrad(grad_output);
+            };
+            return Variable(result, {lhs, rhs}, backward);
+        }
+
+        Variable operator *(const Variable lhs, const Variable rhs)
+        {
+            auto result = lhs.getData() * rhs.getData();
+            auto backward = [](std::vector<Variable> inputs, Variable grad_output) {
+                inputs[0].addGrad(grad_output * inputs[1]);
+                inputs[1].addGrad(grad_output * inputs[0]);
+            };
+            return Variable(result, {lhs, rhs}, backward);
+        }
+
+    }
+    namespace ag = autograd;
+}
diff --git a/include/af/autograd/Grad.hpp b/include/af/autograd/Grad.hpp
new file mode 100644
index 0000000..e1c2114
--- /dev/null
+++ b/include/af/autograd/Grad.hpp
@@ -0,0 +1,26 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#pragma once
+
+#include <af/autograd/Variable.hpp>
+
+namespace af {
+    namespace autograd {
+
+        void backward(Variable var, Variable grad)
+        {
+            var.addGrad(grad);
+            Variable::DAG_t dag = var.build();
+            for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) {
+                iter->backward();
+            }
+        }
+    }
+    namespace ag = autograd;
+}
diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
new file mode 100644
index 0000000..6d87250
--- /dev/null
+++ b/include/af/autograd/Variable.hpp
@@ -0,0 +1,176 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+
+#include <cstddef>
+#include <functional>
+#include <memory>
+#include <vector>
+#include <unordered_map>
+
+#include <arrayfire.h>
+
+namespace af {
+    namespace autograd {
+        class Variable
+        {
+        public:
+            typedef std::function<void(std::vector<Variable>, Variable)> BackwardFunc_t;
+            typedef std::unordered_map<std::ptrdiff_t, bool> Cache_t;
+            typedef std::vector<Variable> DAG_t;
+
+        private:
+            class Shared {
+            public:
+                Shared() :
+                    m_data(),
+                    m_grad(),
+                    m_inputs(),
+                    m_grad_parts(),
+                    m_backward(nullptr)
+                {}
+
+                Shared(af::array data) :
+                    m_data(data),
+                    m_grad(af::constant(0, data.dims(), data.type())),
+                    m_inputs(),
+                    m_grad_parts(),
+                    m_backward(nullptr)
+                {}
+
+                Shared(af::array data, std::vector<Variable> inputs, BackwardFunc_t backward) :
+                    m_data(data),
+                    m_grad(af::constant(0, data.dims(), data.type())),
+                    m_inputs(inputs.begin(), inputs.end()),
+                    m_grad_parts(),
+                    m_backward(backward)
+                {}
+
+                af::array getData() const
+                {
+                    return m_data;
+                }
+
+                af::array getGrad() const
+                {
+                    return m_grad;
+                }
+
+                void addGrad(Variable grad)
+                {
+                    m_grad_parts.push_back(grad);
+                }
+
+                std::vector<Variable> getGradParts()
+                {
+                    return m_grad_parts;
+                }
+
+                std::vector<Variable> getInputs()
+                {
+                    return m_inputs;
+                }
+
+                void evalGrad()
+                {
+                    m_grad = m_grad_parts[0].getData();
+                    for (int i = 1; i < (int)m_grad_parts.size(); i++) {
+                        m_grad += m_grad_parts[i].getData();
+                    }
+                    af::eval(m_grad);
+                }
+
+                void backward()
+                {
+                    this->evalGrad();
+                    if (m_backward) m_backward(m_inputs, m_grad);
+                }
+
+            private:
+                af::array m_data;
+                af::array m_grad;
+                std::vector<Variable> m_inputs;
+                std::vector<Variable> m_grad_parts;
+                BackwardFunc_t m_backward;
+            };
+
+            public:
+
+            Variable() :
+                m_shared(new Shared())
+            {
+            }
+
+            Variable(af::array data) :
+                m_shared(new Shared(data))
+            {}
+
+            Variable(af::array data,
+                     std::vector<Variable> inputs,
+                     BackwardFunc_t backward) :
+                m_shared(new Shared(data, inputs, backward))
+            {}
+
+            af::array getData() const
+            {
+                return m_shared->getData();
+            }
+
+            af::array getGrad() const
+            {
+                return m_shared->getGrad();
+            }
+
+            void addGrad(Variable child_grad)
+            {
+                m_shared->addGrad(child_grad);
+            }
+
+            std::vector<Variable> getInputs() const
+            {
+                return m_shared->getInputs();
+            }
+
+            void evalGrad()
+            {
+                m_shared->evalGrad();
+            }
+
+            void backward()
+            {
+                m_shared->backward();
+            }
+
+            DAG_t build()
+            {
+                Cache_t cache;
+                DAG_t dag;
+                this->buildGraph(cache, dag);
+                return dag;
+            }
+
+            void buildGraph(Cache_t &cache, DAG_t &dag)
+            {
+                std::ptrdiff_t id = (std::ptrdiff_t)m_shared.get();
+                if (cache.find(id) != cache.end()) {
+                    return;
+                }
+                for (auto input : m_shared->getInputs()) {
+                    input.buildGraph(cache, dag);
+                }
+                cache[id] = true;
+                dag.push_back(*this);
+            }
+        private:
+            std::shared_ptr<Shared> m_shared;
+        };
+    }
+    namespace ag = autograd;
+}

From 3f832a0c4d9c15cdd618151dc4273588cef4d2e6 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Sun, 2 Jul 2017 15:59:59 -0700
Subject: [PATCH 04/20] Store gradients as autograd::Variable instead of
 af::array

---
 examples/autograd.cpp            |  6 ++--
 include/af/autograd/Variable.hpp | 47 +++++++++++++++++---------------
 2 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/examples/autograd.cpp b/examples/autograd.cpp
index 2f0f037..56763f1 100644
--- a/examples/autograd.cpp
+++ b/examples/autograd.cpp
@@ -19,7 +19,7 @@ void test1()
     af_print(y.getData());
     auto dy = Variable(af::constant(1.0, 5));
     backward(y, dy);
-    af_print(x.getGrad() - 2 * x.getData());
+    af_print(x.getGrad().getData() - 2 * x.getData());
 }
 
 void test2()
@@ -31,8 +31,8 @@ void test2()
     auto z = x * x + x * y + y * y;
     auto dz = Variable(af::constant(1.0, 5));
     backward(z, dz);
-    af_print(x.getGrad() - 2 * x.getData() - y.getData());
-    af_print(y.getGrad() - 2 * y.getData() - x.getData());
+    af_print(x.getGrad().getData() - 2 * x.getData() - y.getData());
+    af_print(y.getGrad().getData() - 2 * y.getData() - x.getData());
 }
 
 int main()
diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index 6d87250..9b25b86 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -14,11 +14,17 @@
 #include <memory>
 #include <vector>
 #include <unordered_map>
+#include <stdexcept>
 
 #include <arrayfire.h>
 
 namespace af {
     namespace autograd {
+
+        // Forward declare the function
+        class Variable;
+        Variable operator +(const Variable lhs, const Variable rhs);
+
         class Variable
         {
         public:
@@ -31,25 +37,22 @@ namespace af {
             public:
                 Shared() :
                     m_data(),
-                    m_grad(),
                     m_inputs(),
-                    m_grad_parts(),
+                    m_grads(),
                     m_backward(nullptr)
                 {}
 
                 Shared(af::array data) :
                     m_data(data),
-                    m_grad(af::constant(0, data.dims(), data.type())),
                     m_inputs(),
-                    m_grad_parts(),
+                    m_grads(),
                     m_backward(nullptr)
                 {}
 
                 Shared(af::array data, std::vector<Variable> inputs, BackwardFunc_t backward) :
                     m_data(data),
-                    m_grad(af::constant(0, data.dims(), data.type())),
                     m_inputs(inputs.begin(), inputs.end()),
-                    m_grad_parts(),
+                    m_grads(),
                     m_backward(backward)
                 {}
 
@@ -58,19 +61,17 @@ namespace af {
                     return m_data;
                 }
 
-                af::array getGrad() const
+                Variable getGrad() const
                 {
-                    return m_grad;
+                    if (m_grads.size() == 0) {
+                        throw std::runtime_error("Gradient hasn't been calculated");
+                    }
+                    return m_grads[0];
                 }
 
                 void addGrad(Variable grad)
                 {
-                    m_grad_parts.push_back(grad);
-                }
-
-                std::vector<Variable> getGradParts()
-                {
-                    return m_grad_parts;
+                    m_grads.push_back(grad);
                 }
 
                 std::vector<Variable> getInputs()
@@ -80,24 +81,26 @@ namespace af {
 
                 void evalGrad()
                 {
-                    m_grad = m_grad_parts[0].getData();
-                    for (int i = 1; i < (int)m_grad_parts.size(); i++) {
-                        m_grad += m_grad_parts[i].getData();
+                    if (m_grads.size() == 1) return;
+                    Variable grad = m_grads[0];
+                    for (int i = 1; i < (int)m_grads.size(); i++) {
+                        grad = grad + m_grads[i];
                     }
-                    af::eval(m_grad);
+                    grad.getData().eval();
+                    m_grads.clear();
+                    m_grads.push_back(grad);
                 }
 
                 void backward()
                 {
                     this->evalGrad();
-                    if (m_backward) m_backward(m_inputs, m_grad);
+                    if (m_backward) m_backward(m_inputs, m_grads[0]);
                 }
 
             private:
                 af::array m_data;
-                af::array m_grad;
                 std::vector<Variable> m_inputs;
-                std::vector<Variable> m_grad_parts;
+                std::vector<Variable> m_grads;
                 BackwardFunc_t m_backward;
             };
 
@@ -123,7 +126,7 @@ namespace af {
                 return m_shared->getData();
             }
 
-            af::array getGrad() const
+            Variable getGrad() const
             {
                 return m_shared->getGrad();
             }

From c94ee3d0c2fbb8f6a5b54c713d6d678d2bfff976 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Mon, 3 Jul 2017 22:24:46 -0700
Subject: [PATCH 05/20] Refactor autograd::Variable, option to disable grad
 calculations

- autograd::Variable::Shared now a thin layer without methods
- Variable::BackwardFunc_t renamed to Variable::GradFunc_t
- Variable::getData renamed to Variable::array
- Variable::getGrad renamed to Variable::grad
- Variable::backward renamed to Variable::calcGradInputs
---
 examples/FFNet.cpp                |   1 -
 examples/autograd.cpp             |  46 +++++++---
 include/af/autograd/Functions.hpp |  12 +--
 include/af/autograd/Grad.hpp      |   2 +-
 include/af/autograd/Variable.hpp  | 146 ++++++++++++++++--------------
 5 files changed, 118 insertions(+), 89 deletions(-)

diff --git a/examples/FFNet.cpp b/examples/FFNet.cpp
index ec69ed1..f4afec0 100644
--- a/examples/FFNet.cpp
+++ b/examples/FFNet.cpp
@@ -9,7 +9,6 @@
 
 #include <af/nn.h>
 
-using namespace af;
 using namespace af;
 using namespace af::nn;
 
diff --git a/examples/autograd.cpp b/examples/autograd.cpp
index 56763f1..c37c0a4 100644
--- a/examples/autograd.cpp
+++ b/examples/autograd.cpp
@@ -13,26 +13,47 @@ using af::autograd::Variable;
 using af::autograd::backward;
 void test1()
 {
-    auto x = Variable(af::randu(5));
-    af_print(x.getData());
+    auto x = Variable(af::randu(5), true);
+    af_print(x.array());
     auto y = x * x;
-    af_print(y.getData());
-    auto dy = Variable(af::constant(1.0, 5));
+    af_print(y.array());
+    auto dy = Variable(af::constant(1.0, 5), false);
     backward(y, dy);
-    af_print(x.getGrad().getData() - 2 * x.getData());
+    auto dx = x.grad();
+    af_print(dx.array() - 2 * x.array());
 }
 
 void test2()
 {
-    auto x = Variable(af::randu(5));
-    af_print(x.getData());
-    auto y = Variable(af::randu(5));
-    af_print(y.getData());
+    auto x = Variable(af::randu(5), true);
+    af_print(x.array());
+    auto y = Variable(af::randu(5), true);
+    af_print(y.array());
     auto z = x * x + x * y + y * y;
-    auto dz = Variable(af::constant(1.0, 5));
+    auto dz = Variable(af::constant(1.0, 5), false);
     backward(z, dz);
-    af_print(x.getGrad().getData() - 2 * x.getData() - y.getData());
-    af_print(y.getGrad().getData() - 2 * y.getData() - x.getData());
+    auto dx = x.grad();
+    auto dy = y.grad();
+    af_print(dx.array() - 2 * x.array() - y.array());
+    af_print(dy.array() - 2 * y.array() - x.array());
+}
+
+void test3()
+{
+    auto x = Variable(af::randu(5), false);
+    af_print(x.array());
+    auto y = Variable(af::randu(5), true);
+    af_print(y.array());
+    auto z = x * x + x * y + y * y;
+    auto dz = Variable(af::constant(1.0, 5), false);
+    backward(z, dz);
+    auto dy = y.grad();
+    af_print(dy.array() - 2 * y.array() - x.array());
+    try {
+        auto dx = x.grad();
+    } catch(af::exception &ex) {
+        std::cout << ex.what() << std::endl;
+    }
 }
 
 int main()
@@ -40,5 +61,6 @@ int main()
     af::info();
     test1();
     test2();
+    test3();
     return 0;
 }
diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp
index 0985ae8..8bb5edb 100644
--- a/include/af/autograd/Functions.hpp
+++ b/include/af/autograd/Functions.hpp
@@ -15,22 +15,22 @@ namespace af {
 
         Variable operator +(const Variable lhs, const Variable rhs)
         {
-            auto result = lhs.getData() + rhs.getData();
-            auto backward = [](std::vector<Variable> inputs, Variable grad_output) {
+            auto result = lhs.array() + rhs.array();
+            auto grad_func = [](std::vector<Variable> inputs, Variable grad_output) {
                 inputs[0].addGrad(grad_output);
                 inputs[1].addGrad(grad_output);
             };
-            return Variable(result, {lhs, rhs}, backward);
+            return Variable(result, {lhs, rhs}, grad_func);
         }
 
         Variable operator *(const Variable lhs, const Variable rhs)
         {
-            auto result = lhs.getData() * rhs.getData();
-            auto backward = [](std::vector<Variable> inputs, Variable grad_output) {
+            auto result = lhs.array() * rhs.array();
+            auto grad_func = [](std::vector<Variable> inputs, Variable grad_output) {
                 inputs[0].addGrad(grad_output * inputs[1]);
                 inputs[1].addGrad(grad_output * inputs[0]);
             };
-            return Variable(result, {lhs, rhs}, backward);
+            return Variable(result, {lhs, rhs}, grad_func);
         }
 
     }
diff --git a/include/af/autograd/Grad.hpp b/include/af/autograd/Grad.hpp
index e1c2114..2e1d4e9 100644
--- a/include/af/autograd/Grad.hpp
+++ b/include/af/autograd/Grad.hpp
@@ -18,7 +18,7 @@ namespace af {
             var.addGrad(grad);
             Variable::DAG_t dag = var.build();
             for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) {
-                iter->backward();
+                iter->calcGradInputs();
             }
         }
     }
diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index 9b25b86..143b530 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -28,80 +28,44 @@ namespace af {
         class Variable
         {
         public:
-            typedef std::function<void(std::vector<Variable>, Variable)> BackwardFunc_t;
+            typedef std::function<void(std::vector<Variable>, Variable)> GradFunc_t;
             typedef std::unordered_map<std::ptrdiff_t, bool> Cache_t;
             typedef std::vector<Variable> DAG_t;
 
         private:
-            class Shared {
-            public:
+            struct Shared {
                 Shared() :
+                    m_calc_grad(true),
                     m_data(),
                     m_inputs(),
                     m_grads(),
-                    m_backward(nullptr)
+                    m_grad_func(nullptr)
                 {}
 
-                Shared(af::array data) :
+                Shared(af::array data, bool calc_grad) :
+                    m_calc_grad(calc_grad),
                     m_data(data),
                     m_inputs(),
                     m_grads(),
-                    m_backward(nullptr)
+                    m_grad_func(nullptr)
                 {}
 
-                Shared(af::array data, std::vector<Variable> inputs, BackwardFunc_t backward) :
+                Shared(af::array data,
+                       std::vector<Variable> inputs,
+                       GradFunc_t grad_func,
+                       bool calc_grad) :
+                    m_calc_grad(calc_grad),
                     m_data(data),
                     m_inputs(inputs.begin(), inputs.end()),
                     m_grads(),
-                    m_backward(backward)
+                    m_grad_func(grad_func)
                 {}
 
-                af::array getData() const
-                {
-                    return m_data;
-                }
-
-                Variable getGrad() const
-                {
-                    if (m_grads.size() == 0) {
-                        throw std::runtime_error("Gradient hasn't been calculated");
-                    }
-                    return m_grads[0];
-                }
-
-                void addGrad(Variable grad)
-                {
-                    m_grads.push_back(grad);
-                }
-
-                std::vector<Variable> getInputs()
-                {
-                    return m_inputs;
-                }
-
-                void evalGrad()
-                {
-                    if (m_grads.size() == 1) return;
-                    Variable grad = m_grads[0];
-                    for (int i = 1; i < (int)m_grads.size(); i++) {
-                        grad = grad + m_grads[i];
-                    }
-                    grad.getData().eval();
-                    m_grads.clear();
-                    m_grads.push_back(grad);
-                }
-
-                void backward()
-                {
-                    this->evalGrad();
-                    if (m_backward) m_backward(m_inputs, m_grads[0]);
-                }
-
-            private:
+                bool m_calc_grad;
                 af::array m_data;
                 std::vector<Variable> m_inputs;
                 std::vector<Variable> m_grads;
-                BackwardFunc_t m_backward;
+                GradFunc_t m_grad_func;
             };
 
             public:
@@ -111,62 +75,106 @@ namespace af {
             {
             }
 
-            Variable(af::array data) :
-                m_shared(new Shared(data))
+            Variable(af::array data, bool calc_grad) :
+                m_shared(new Shared(data, calc_grad))
             {}
 
             Variable(af::array data,
                      std::vector<Variable> inputs,
-                     BackwardFunc_t backward) :
-                m_shared(new Shared(data, inputs, backward))
-            {}
+                     GradFunc_t grad_func) :
+                m_shared(nullptr)
+            {
+                bool calc_grad = false;
+                for (auto input : inputs) {
+                    calc_grad |= input.isCalcGrad();
+                }
+                if (calc_grad) {
+                    m_shared = std::shared_ptr<Shared>(new Shared(data, inputs, grad_func, true));
+                } else {
+                    m_shared = std::shared_ptr<Shared>(new Shared(data, false));
+                }
+            }
+
+            af::array array() const
+            {
+                return m_shared->m_data;
+            }
 
-            af::array getData() const
+            Variable grad() const
             {
-                return m_shared->getData();
+                if (!m_shared->m_calc_grad) {
+                    throw af::exception("Gradient calclation disabled.");
+                }
+                if (m_shared->m_grads.size() == 0) {
+                    throw af::exception("Gradient hasn't been calculated yet.");
+                }
+                return m_shared->m_grads[0];
             }
 
-            Variable getGrad() const
+            bool isCalcGrad()
             {
-                return m_shared->getGrad();
+                return m_shared->m_calc_grad;
+            }
+
+            void setCalcGrad(bool calc_grad)
+            {
+                m_shared->m_calc_grad = calc_grad;
+                if (!calc_grad) {
+                    m_shared->m_grad_func = nullptr;
+                    m_shared->m_inputs.clear();
+                    m_shared->m_grads.clear();
+                }
             }
 
             void addGrad(Variable child_grad)
             {
-                m_shared->addGrad(child_grad);
+                if (m_shared->m_calc_grad) {
+                    m_shared->m_grads.push_back(child_grad);
+                }
             }
 
             std::vector<Variable> getInputs() const
             {
-                return m_shared->getInputs();
+                return m_shared->m_inputs;
             }
 
             void evalGrad()
             {
-                m_shared->evalGrad();
+                // Flag asking not to calculate gradients
+                if (!m_shared->m_calc_grad) return;
+                Variable grad = m_shared->m_grads[0];
+                for (unsigned i = 1; i < m_shared->m_grads.size(); i++) {
+                    grad = grad + m_shared->m_grads[i];
+                }
+                grad.array().eval();
+                m_shared->m_grads.clear();
+                m_shared->m_grads.push_back(grad);
             }
 
-            void backward()
+            void calcGradInputs()
             {
-                m_shared->backward();
+                evalGrad();
+                if (m_shared->m_grad_func) {
+                    m_shared->m_grad_func(m_shared->m_inputs, m_shared->m_grads[0]);
+                }
             }
 
             DAG_t build()
             {
                 Cache_t cache;
                 DAG_t dag;
-                this->buildGraph(cache, dag);
+                this->buildSubGraph(cache, dag);
                 return dag;
             }
 
-            void buildGraph(Cache_t &cache, DAG_t &dag)
+            void buildSubGraph(Cache_t &cache, DAG_t &dag)
             {
                 std::ptrdiff_t id = (std::ptrdiff_t)m_shared.get();
                 if (cache.find(id) != cache.end()) {
                     return;
                 }
-                for (auto input : m_shared->getInputs()) {
-                    input.buildGraph(cache, dag);
+                for (auto input : m_shared->m_inputs) {
+                    input.buildSubGraph(cache, dag);
                 }
                 cache[id] = true;
                 dag.push_back(*this);

From 7bb0b6c5ba149580f02cb15dbd7a6bc41bdfc47d Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Tue, 4 Jul 2017 17:18:39 -0700
Subject: [PATCH 06/20] Changing autograd::backward function to
 Variable::backward method

---
 examples/autograd.cpp            |  7 +++----
 include/af/autograd.h            |  1 -
 include/af/autograd/Grad.hpp     | 26 --------------------------
 include/af/autograd/Variable.hpp |  9 +++++++++
 4 files changed, 12 insertions(+), 31 deletions(-)
 delete mode 100644 include/af/autograd/Grad.hpp

diff --git a/examples/autograd.cpp b/examples/autograd.cpp
index c37c0a4..244c2b0 100644
--- a/examples/autograd.cpp
+++ b/examples/autograd.cpp
@@ -10,7 +10,6 @@
 #include <af/autograd.h>
 
 using af::autograd::Variable;
-using af::autograd::backward;
 void test1()
 {
     auto x = Variable(af::randu(5), true);
@@ -18,7 +17,7 @@ void test1()
     auto y = x * x;
     af_print(y.array());
     auto dy = Variable(af::constant(1.0, 5), false);
-    backward(y, dy);
+    y.backward(dy);
     auto dx = x.grad();
     af_print(dx.array() - 2 * x.array());
 }
@@ -31,7 +30,7 @@ void test2()
     af_print(y.array());
     auto z = x * x + x * y + y * y;
     auto dz = Variable(af::constant(1.0, 5), false);
-    backward(z, dz);
+    z.backward(dz);
     auto dx = x.grad();
     auto dy = y.grad();
     af_print(dx.array() - 2 * x.array() - y.array());
@@ -46,7 +45,7 @@ void test3()
     af_print(y.array());
     auto z = x * x + x * y + y * y;
     auto dz = Variable(af::constant(1.0, 5), false);
-    backward(z, dz);
+    z.backward(dz);
     auto dy = y.grad();
     af_print(dy.array() - 2 * y.array() - x.array());
     try {
diff --git a/include/af/autograd.h b/include/af/autograd.h
index e85625c..ceda2b1 100644
--- a/include/af/autograd.h
+++ b/include/af/autograd.h
@@ -8,4 +8,3 @@
  ********************************************************/
 #include <af/autograd/Variable.hpp>
 #include <af/autograd/Functions.hpp>
-#include <af/autograd/Grad.hpp>
diff --git a/include/af/autograd/Grad.hpp b/include/af/autograd/Grad.hpp
deleted file mode 100644
index 2e1d4e9..0000000
--- a/include/af/autograd/Grad.hpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/*******************************************************
- * Copyright (c) 2017, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/autograd/Variable.hpp>
-
-namespace af {
-    namespace autograd {
-
-        void backward(Variable var, Variable grad)
-        {
-            var.addGrad(grad);
-            Variable::DAG_t dag = var.build();
-            for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) {
-                iter->calcGradInputs();
-            }
-        }
-    }
-    namespace ag = autograd;
-}
diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index 143b530..ea8bd4b 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -159,6 +159,15 @@ namespace af {
                 }
             }
 
+            void backward(Variable grad)
+            {
+                this->addGrad(grad);
+                DAG_t dag = this->build();
+                for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) {
+                    iter->calcGradInputs();
+                }
+            }
+
             DAG_t build()
             {
                 Cache_t cache;

From a316af0727a7aeeadf9018d7c4bc946f766d066a Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Tue, 4 Jul 2017 22:11:42 -0700
Subject: [PATCH 07/20] Moving autograd from header only lib to a compiled lib

---
 CMakeLists.txt                    |  31 +++++-
 examples/CMakeLists.txt           |   7 +-
 include/af/autograd/Functions.hpp |  25 +----
 include/af/autograd/Variable.hpp  | 172 +++++-------------------------
 src/autograd/Functions.cpp        |  37 +++++++
 src/autograd/Variable.cpp         | 157 +++++++++++++++++++++++++++
 6 files changed, 253 insertions(+), 176 deletions(-)
 create mode 100644 src/autograd/Functions.cpp
 create mode 100644 src/autograd/Variable.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 104d635..699baf6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,9 +1,36 @@
-cmake_minimum_required(VERSION 3.5.2)
+cmake_minimum_required(VERSION 3.5.1)
 
 project(ArrayFireML
   VERSION 0.1.0
   LANGUAGES C CXX)
 
 find_package(ArrayFire REQUIRED)
-set(ArrayFireML_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+add_library(afml SHARED "")
+
+target_sources(afml
+  PRIVATE
+  src/autograd/Variable.cpp
+  src/autograd/Functions.cpp
+  )
+
+target_include_directories(afml
+  PUBLIC
+  ${ArrayFire_INCLUDE_DIRS}
+  ${CMAKE_CURRENT_SOURCE_DIR}/include
+  )
+
+target_link_libraries(afml
+  PUBLIC
+  af
+  )
+
+set_target_properties(afml
+  PROPERTIES
+  VERSION "${ArrayFireML_VERSION}"
+  SOVERSION "${ArrayFireML_VERSION_MAJOR}"
+  CXX_STANDARD 11
+  )
+
+
 add_subdirectory(examples)
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 9a39b82..6e92389 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -2,14 +2,9 @@ function(build_example SRC)
   get_filename_component(src_name ${SRC} NAME_WE)
   set(target "${src_name}")
   add_executable(${target} ${SRC})
-  target_include_directories(${target}
-    PRIVATE
-    ${ArrayFire_INCLUDE_DIRS}
-    ${ArrayFireML_INCLUDE_DIRS}
-    )
   target_link_libraries(${target}
     PRIVATE
-    af
+    afml
     )
   target_compile_features(${target}
     PRIVATE cxx_range_for)
diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp
index 8bb5edb..902a9f6 100644
--- a/include/af/autograd/Functions.hpp
+++ b/include/af/autograd/Functions.hpp
@@ -8,31 +8,12 @@
  ********************************************************/
 #pragma once
 
-#include <af/autograd/Variable.hpp>
-
 namespace af {
     namespace autograd {
 
-        Variable operator +(const Variable lhs, const Variable rhs)
-        {
-            auto result = lhs.array() + rhs.array();
-            auto grad_func = [](std::vector<Variable> inputs, Variable grad_output) {
-                inputs[0].addGrad(grad_output);
-                inputs[1].addGrad(grad_output);
-            };
-            return Variable(result, {lhs, rhs}, grad_func);
-        }
-
-        Variable operator *(const Variable lhs, const Variable rhs)
-        {
-            auto result = lhs.array() * rhs.array();
-            auto grad_func = [](std::vector<Variable> inputs, Variable grad_output) {
-                inputs[0].addGrad(grad_output * inputs[1]);
-                inputs[1].addGrad(grad_output * inputs[0]);
-            };
-            return Variable(result, {lhs, rhs}, grad_func);
-        }
+        class Variable;
 
+        Variable operator +(const Variable lhs, const Variable rhs);
+        Variable operator *(const Variable lhs, const Variable rhs);
     }
-    namespace ag = autograd;
 }
diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index ea8bd4b..5d7ca36 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -14,17 +14,11 @@
 #include <memory>
 #include <vector>
 #include <unordered_map>
-#include <stdexcept>
 
 #include <arrayfire.h>
 
 namespace af {
     namespace autograd {
-
-        // Forward declare the function
-        class Variable;
-        Variable operator +(const Variable lhs, const Variable rhs);
-
         class Variable
         {
         public:
@@ -34,32 +28,12 @@ namespace af {
 
         private:
             struct Shared {
-                Shared() :
-                    m_calc_grad(true),
-                    m_data(),
-                    m_inputs(),
-                    m_grads(),
-                    m_grad_func(nullptr)
-                {}
-
-                Shared(af::array data, bool calc_grad) :
-                    m_calc_grad(calc_grad),
-                    m_data(data),
-                    m_inputs(),
-                    m_grads(),
-                    m_grad_func(nullptr)
-                {}
-
+                Shared();
+                Shared(af::array data, bool calc_grad);
                 Shared(af::array data,
                        std::vector<Variable> inputs,
                        GradFunc_t grad_func,
-                       bool calc_grad) :
-                    m_calc_grad(calc_grad),
-                    m_data(data),
-                    m_inputs(inputs.begin(), inputs.end()),
-                    m_grads(),
-                    m_grad_func(grad_func)
-                {}
+                       bool calc_grad);
 
                 bool m_calc_grad;
                 af::array m_data;
@@ -70,127 +44,33 @@ namespace af {
 
             public:
 
-            Variable() :
-                m_shared(new Shared())
-            {
-            }
-
-            Variable(af::array data, bool calc_grad) :
-                m_shared(new Shared(data, calc_grad))
-            {}
-
+            Variable();
+            Variable(af::array data, bool calc_grad);
             Variable(af::array data,
                      std::vector<Variable> inputs,
-                     GradFunc_t grad_func) :
-                m_shared(nullptr)
-            {
-                bool calc_grad = false;
-                for (auto input : inputs) {
-                    calc_grad |= input.isCalcGrad();
-                }
-                if (calc_grad) {
-                    m_shared = std::shared_ptr<Shared>(new Shared(data, inputs, grad_func, true));
-                } else {
-                    m_shared = std::shared_ptr<Shared>(new Shared(data, false));
-                }
-            }
-
-            af::array array() const
-            {
-                return m_shared->m_data;
-            }
-
-            Variable grad() const
-            {
-                if (!m_shared->m_calc_grad) {
-                    throw af::exception("Gradient calclation disabled.");
-                }
-                if (m_shared->m_grads.size() == 0) {
-                    throw af::exception("Gradient hasn't been calculated yet.");
-                }
-                return m_shared->m_grads[0];
-            }
-
-            bool isCalcGrad()
-            {
-                return m_shared->m_calc_grad;
-            }
-
-            void setCalcGrad(bool calc_grad)
-            {
-                m_shared->m_calc_grad = calc_grad;
-                if (!calc_grad) {
-                    m_shared->m_grad_func = nullptr;
-                    m_shared->m_inputs.clear();
-                    m_shared->m_grads.clear();
-                }
-            }
-
-            void addGrad(Variable child_grad)
-            {
-                if (m_shared->m_calc_grad) {
-                    m_shared->m_grads.push_back(child_grad);
-                }
-            }
-
-            std::vector<Variable> getInputs() const
-            {
-                return m_shared->m_inputs;
-            }
-
-            void evalGrad()
-            {
-                // Flag asking not to calculate gradients
-                if (!m_shared->m_calc_grad) return;
-                Variable grad = m_shared->m_grads[0];
-                for (unsigned i = 1; i < m_shared->m_grads.size(); i++) {
-                    grad = grad + m_shared->m_grads[i];
-                }
-                grad.array().eval();
-                m_shared->m_grads.clear();
-                m_shared->m_grads.push_back(grad);
-            }
-
-            void calcGradInputs()
-            {
-                evalGrad();
-                if (m_shared->m_grad_func) {
-                    m_shared->m_grad_func(m_shared->m_inputs, m_shared->m_grads[0]);
-                }
-            }
-
-            void backward(Variable grad)
-            {
-                this->addGrad(grad);
-                DAG_t dag = this->build();
-                for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) {
-                    iter->calcGradInputs();
-                }
-            }
-
-            DAG_t build()
-            {
-                Cache_t cache;
-                DAG_t dag;
-                this->buildSubGraph(cache, dag);
-                return dag;
-            }
-
-            void buildSubGraph(Cache_t &cache, DAG_t &dag)
-            {
-                std::ptrdiff_t id = (std::ptrdiff_t)m_shared.get();
-                if (cache.find(id) != cache.end()) {
-                    return;
-                }
-                for (auto input : m_shared->m_inputs) {
-                    input.buildSubGraph(cache, dag);
-                }
-                cache[id] = true;
-                dag.push_back(*this);
-            }
+                     GradFunc_t grad_func);
+
+            af::array array() const;
+
+            Variable grad() const;
+
+            bool isCalcGrad();
+
+            void setCalcGrad(bool calc_grad);
+
+            void addGrad(Variable child_grad);
+
+            void evalGrad();
+
+            void calcGradInputs();
+
+            void backward(Variable grad);
+
+            DAG_t build();
+
+            void buildSubGraph(Cache_t &cache, DAG_t &dag);
         private:
             std::shared_ptr<Shared> m_shared;
         };
     }
-    namespace ag = autograd;
 }
diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp
new file mode 100644
index 0000000..d24a78d
--- /dev/null
+++ b/src/autograd/Functions.cpp
@@ -0,0 +1,37 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <af/autograd/Variable.hpp>
+#include <af/autograd/Functions.hpp>
+
+namespace af {
+    namespace autograd {
+
+        Variable operator +(const Variable lhs, const Variable rhs)
+        {
+            auto result = lhs.array() + rhs.array();
+            auto grad_func = [](std::vector<Variable> inputs, Variable grad_output) {
+                inputs[0].addGrad(grad_output);
+                inputs[1].addGrad(grad_output);
+            };
+            return Variable(result, {lhs, rhs}, grad_func);
+        }
+
+        Variable operator *(const Variable lhs, const Variable rhs)
+        {
+            auto result = lhs.array() * rhs.array();
+            auto grad_func = [](std::vector<Variable> inputs, Variable grad_output) {
+                inputs[0].addGrad(grad_output * inputs[1]);
+                inputs[1].addGrad(grad_output * inputs[0]);
+            };
+            return Variable(result, {lhs, rhs}, grad_func);
+        }
+
+    }
+}
diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp
new file mode 100644
index 0000000..118e199
--- /dev/null
+++ b/src/autograd/Variable.cpp
@@ -0,0 +1,157 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <af/autograd/Variable.hpp>
+#include <af/autograd/Functions.hpp>
+
+namespace af {
+    namespace autograd {
+
+        Variable::Shared::Shared() :
+            m_calc_grad(true),
+            m_data(),
+            m_inputs(),
+            m_grads(),
+            m_grad_func(nullptr)
+        {}
+
+        Variable::Shared::Shared(af::array data, bool calc_grad) :
+            m_calc_grad(calc_grad),
+            m_data(data),
+            m_inputs(),
+            m_grads(),
+            m_grad_func(nullptr)
+        {}
+
+        Variable::Shared::Shared(af::array data,
+                                 std::vector<Variable> inputs,
+                                 GradFunc_t grad_func,
+                                 bool calc_grad) :
+            m_calc_grad(calc_grad),
+            m_data(data),
+            m_inputs(inputs.begin(), inputs.end()),
+            m_grads(),
+            m_grad_func(grad_func)
+        {}
+
+        Variable::Variable() :
+            m_shared(new Shared())
+        {
+        }
+
+        Variable::Variable(af::array data, bool calc_grad) :
+            m_shared(new Shared(data, calc_grad))
+        {}
+
+        Variable::Variable(af::array data,
+                 std::vector<Variable> inputs,
+                 GradFunc_t grad_func) :
+            m_shared(nullptr)
+        {
+            bool calc_grad = false;
+            for (auto input : inputs) {
+                calc_grad |= input.isCalcGrad();
+            }
+            if (calc_grad) {
+                m_shared = std::shared_ptr<Shared>(new Shared(data, inputs, grad_func, true));
+            } else {
+                m_shared = std::shared_ptr<Shared>(new Shared(data, false));
+            }
+        }
+
+        af::array Variable::array() const
+        {
+            return m_shared->m_data;
+        }
+
+        Variable Variable::grad() const
+        {
+            if (!m_shared->m_calc_grad) {
+                throw af::exception("Gradient calclation disabled.");
+            }
+            if (m_shared->m_grads.size() == 0) {
+                throw af::exception("Gradient hasn't been calculated yet.");
+            }
+            return m_shared->m_grads[0];
+        }
+
+        bool Variable::isCalcGrad()
+        {
+            return m_shared->m_calc_grad;
+        }
+
+        void Variable::setCalcGrad(bool calc_grad)
+        {
+            m_shared->m_calc_grad = calc_grad;
+            if (!calc_grad) {
+                m_shared->m_grad_func = nullptr;
+                m_shared->m_inputs.clear();
+                m_shared->m_grads.clear();
+            }
+        }
+
+        void Variable::addGrad(Variable child_grad)
+        {
+            if (m_shared->m_calc_grad) {
+                m_shared->m_grads.push_back(child_grad);
+            }
+        }
+
+        void Variable::evalGrad()
+        {
+            // Flag asking not to calculate gradients
+            if (!m_shared->m_calc_grad) return;
+            Variable grad = m_shared->m_grads[0];
+            for (unsigned i = 1; i < m_shared->m_grads.size(); i++) {
+                grad = grad + m_shared->m_grads[i];
+            }
+            grad.array().eval();
+            m_shared->m_grads.clear();
+            m_shared->m_grads.push_back(grad);
+        }
+
+        void Variable::calcGradInputs()
+        {
+            evalGrad();
+            if (m_shared->m_grad_func) {
+                m_shared->m_grad_func(m_shared->m_inputs, m_shared->m_grads[0]);
+            }
+        }
+
+        void Variable::backward(Variable grad)
+        {
+            this->addGrad(grad);
+            Variable::DAG_t dag = this->build();
+            for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) {
+                iter->calcGradInputs();
+            }
+        }
+
+        Variable::DAG_t Variable::build()
+        {
+            Cache_t cache;
+                    Variable::DAG_t dag;
+            this->buildSubGraph(cache, dag);
+            return dag;
+        }
+
+        void Variable::buildSubGraph(Cache_t &cache, Variable::DAG_t &dag)
+        {
+            std::ptrdiff_t id = (std::ptrdiff_t)m_shared.get();
+            if (cache.find(id) != cache.end()) {
+                return;
+            }
+            for (auto input : m_shared->m_inputs) {
+                input.buildSubGraph(cache, dag);
+            }
+            cache[id] = true;
+            dag.push_back(*this);
+        }
+    }
+}

From d7edafc6e896331bee7982deb6d297f6783c61c9 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Tue, 4 Jul 2017 23:36:53 -0700
Subject: [PATCH 08/20] Adding negate, reciprocal, subtract and divide

---
 examples/autograd.cpp             | 62 +++++++++++++++++++++++--------
 include/af/autograd/Functions.hpp |  9 ++++-
 include/af/autograd/Variable.hpp  | 20 +++++-----
 src/autograd/Functions.cpp        | 49 ++++++++++++++++++++++--
 src/autograd/Variable.cpp         | 36 ++++++++++--------
 5 files changed, 128 insertions(+), 48 deletions(-)

diff --git a/examples/autograd.cpp b/examples/autograd.cpp
index 244c2b0..40b1dc9 100644
--- a/examples/autograd.cpp
+++ b/examples/autograd.cpp
@@ -9,57 +9,87 @@
 
 #include <af/autograd.h>
 
+#define VERIFY(VAL) do {                                    \
+        auto res = af::allTrue<bool>(af::abs(VAL) < 1E-5);  \
+        printf("%s:%d %s\n", __FUNCTION__, __LINE__,        \
+               res ? "PASS" : "FAIL");                      \
+    } while(0)
+
 using af::autograd::Variable;
-void test1()
+void test_multiply()
 {
     auto x = Variable(af::randu(5), true);
-    af_print(x.array());
     auto y = x * x;
-    af_print(y.array());
     auto dy = Variable(af::constant(1.0, 5), false);
     y.backward(dy);
     auto dx = x.grad();
-    af_print(dx.array() - 2 * x.array());
+    VERIFY(dx.array() - 2 * x.array());
 }
 
-void test2()
+void test_multipl_add()
 {
     auto x = Variable(af::randu(5), true);
-    af_print(x.array());
     auto y = Variable(af::randu(5), true);
-    af_print(y.array());
     auto z = x * x + x * y + y * y;
     auto dz = Variable(af::constant(1.0, 5), false);
     z.backward(dz);
     auto dx = x.grad();
     auto dy = y.grad();
-    af_print(dx.array() - 2 * x.array() - y.array());
-    af_print(dy.array() - 2 * y.array() - x.array());
+    VERIFY(dx.array() - 2 * x.array() - y.array());
+    VERIFY(dy.array() - 2 * y.array() - x.array());
 }
 
-void test3()
+void test_no_calc_grad()
 {
     auto x = Variable(af::randu(5), false);
-    af_print(x.array());
     auto y = Variable(af::randu(5), true);
-    af_print(y.array());
     auto z = x * x + x * y + y * y;
     auto dz = Variable(af::constant(1.0, 5), false);
     z.backward(dz);
     auto dy = y.grad();
-    af_print(dy.array() - 2 * y.array() - x.array());
+    VERIFY(dy.array() - 2 * y.array() - x.array());
     try {
         auto dx = x.grad();
     } catch(af::exception &ex) {
         std::cout << ex.what() << std::endl;
+        return;
     }
+    printf("%s:%d No Gradient check Failed\n");
+}
+
+void test_multiply_sub()
+{
+    auto x = Variable(af::randu(5), true);
+    auto y = Variable(af::randu(5), true);
+    auto z = x * x - x * y;
+    auto dz = Variable(af::constant(1.0, 5), false);
+    z.backward(dz);
+    auto dx = x.grad();
+    auto dy = y.grad();
+    VERIFY(dx.array() - (2 * x.array() - y.array()));
+    VERIFY(dy.array() - (-x.array()));
+}
+
+void test_divide_add()
+{
+    auto x = Variable(af::randu(5), true);
+    auto y = Variable(af::randu(5), true);
+    auto z = x + x / y + y;
+    auto dz = Variable(af::constant(1.0, 5), false);
+    z.backward(dz);
+    auto dx = x.grad();
+    auto dy = y.grad();
+    VERIFY(dx.array() - (1.0 + 1.0 / y.array()));
+    VERIFY(dy.array() - (1.0 - x.array() / (y.array() * y.array())));
 }
 
 int main()
 {
     af::info();
-    test1();
-    test2();
-    test3();
+    test_multiply();
+    test_multipl_add();
+    test_no_calc_grad();
+    test_multiply_sub();
+    test_divide_add();
     return 0;
 }
diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp
index 902a9f6..2fa493b 100644
--- a/include/af/autograd/Functions.hpp
+++ b/include/af/autograd/Functions.hpp
@@ -13,7 +13,12 @@ namespace af {
 
         class Variable;
 
-        Variable operator +(const Variable lhs, const Variable rhs);
-        Variable operator *(const Variable lhs, const Variable rhs);
+        Variable operator +(const Variable &lhs, const Variable &rhs);
+        Variable operator *(const Variable &lhs, const Variable &rhs);
+        Variable operator -(const Variable &lhs, const Variable &rhs);
+        Variable operator /(const Variable &lhs, const Variable &rhs);
+
+        Variable negate(const Variable &input);
+        Variable reciprocal(const Variable &input);
     }
 }
diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index 5d7ca36..74dd61e 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -22,16 +22,16 @@ namespace af {
         class Variable
         {
         public:
-            typedef std::function<void(std::vector<Variable>, Variable)> GradFunc_t;
+            typedef std::function<void(std::vector<Variable> &, const Variable &)> GradFunc_t;
             typedef std::unordered_map<std::ptrdiff_t, bool> Cache_t;
             typedef std::vector<Variable> DAG_t;
 
         private:
             struct Shared {
                 Shared();
-                Shared(af::array data, bool calc_grad);
-                Shared(af::array data,
-                       std::vector<Variable> inputs,
+                Shared(const af::array &data, bool calc_grad);
+                Shared(const af::array &data,
+                       const std::vector<Variable> &inputs,
                        GradFunc_t grad_func,
                        bool calc_grad);
 
@@ -45,26 +45,26 @@ namespace af {
             public:
 
             Variable();
-            Variable(af::array data, bool calc_grad);
-            Variable(af::array data,
-                     std::vector<Variable> inputs,
+            Variable(const af::array &data, bool calc_grad);
+            Variable(const af::array &data,
+                     const std::vector<Variable> &inputs,
                      GradFunc_t grad_func);
 
             af::array array() const;
 
             Variable grad() const;
 
-            bool isCalcGrad();
+            bool isCalcGrad() const;
 
             void setCalcGrad(bool calc_grad);
 
-            void addGrad(Variable child_grad);
+            void addGrad(const Variable &child_grad);
 
             void evalGrad();
 
             void calcGradInputs();
 
-            void backward(Variable grad);
+            void backward(const Variable &grad);
 
             DAG_t build();
 
diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp
index d24a78d..e6692ad 100644
--- a/src/autograd/Functions.cpp
+++ b/src/autograd/Functions.cpp
@@ -13,25 +13,66 @@
 namespace af {
     namespace autograd {
 
-        Variable operator +(const Variable lhs, const Variable rhs)
+        Variable operator +(const Variable &lhs, const Variable &rhs)
         {
             auto result = lhs.array() + rhs.array();
-            auto grad_func = [](std::vector<Variable> inputs, Variable grad_output) {
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
                 inputs[0].addGrad(grad_output);
                 inputs[1].addGrad(grad_output);
             };
             return Variable(result, {lhs, rhs}, grad_func);
         }
 
-        Variable operator *(const Variable lhs, const Variable rhs)
+        Variable operator -(const Variable &lhs, const Variable &rhs)
+        {
+            auto result = lhs.array() - rhs.array();
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                inputs[0].addGrad(grad_output);
+                inputs[1].addGrad(negate(grad_output));
+            };
+            return Variable(result, {lhs, rhs}, grad_func);
+        }
+
+        Variable operator *(const Variable &lhs, const Variable &rhs)
         {
             auto result = lhs.array() * rhs.array();
-            auto grad_func = [](std::vector<Variable> inputs, Variable grad_output) {
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
                 inputs[0].addGrad(grad_output * inputs[1]);
                 inputs[1].addGrad(grad_output * inputs[0]);
             };
             return Variable(result, {lhs, rhs}, grad_func);
         }
 
+        Variable operator /(const Variable &lhs, const Variable &rhs)
+        {
+            auto result = lhs.array() / rhs.array();
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                auto inputs_1_rec = reciprocal(inputs[1]);
+                auto grad_input_0 = grad_output * inputs_1_rec;
+                inputs[0].addGrad(grad_input_0);
+                inputs[1].addGrad(grad_input_0 * negate(inputs[0]) * inputs_1_rec);
+            };
+            return Variable(result, {lhs, rhs}, grad_func);
+        }
+
+        Variable negate(const Variable &input)
+        {
+            auto result = 0.0 - input.array();
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                inputs[0].addGrad(negate(grad_output));
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
+        Variable reciprocal(const Variable &input)
+        {
+            auto result = 1.0 / input.array();
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                auto res = reciprocal(inputs[0]);
+                inputs[0].addGrad(negate(grad_output) * res * res);
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
     }
 }
diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp
index 118e199..3a7a41b 100644
--- a/src/autograd/Variable.cpp
+++ b/src/autograd/Variable.cpp
@@ -21,7 +21,7 @@ namespace af {
             m_grad_func(nullptr)
         {}
 
-        Variable::Shared::Shared(af::array data, bool calc_grad) :
+        Variable::Shared::Shared(const af::array &data, bool calc_grad) :
             m_calc_grad(calc_grad),
             m_data(data),
             m_inputs(),
@@ -29,8 +29,8 @@ namespace af {
             m_grad_func(nullptr)
         {}
 
-        Variable::Shared::Shared(af::array data,
-                                 std::vector<Variable> inputs,
+        Variable::Shared::Shared(const af::array &data,
+                                 const std::vector<Variable> &inputs,
                                  GradFunc_t grad_func,
                                  bool calc_grad) :
             m_calc_grad(calc_grad),
@@ -45,13 +45,13 @@ namespace af {
         {
         }
 
-        Variable::Variable(af::array data, bool calc_grad) :
+        Variable::Variable(const af::array &data, bool calc_grad) :
             m_shared(new Shared(data, calc_grad))
         {}
 
-        Variable::Variable(af::array data,
-                 std::vector<Variable> inputs,
-                 GradFunc_t grad_func) :
+        Variable::Variable(const af::array &data,
+                           const std::vector<Variable> &inputs,
+                           GradFunc_t grad_func) :
             m_shared(nullptr)
         {
             bool calc_grad = false;
@@ -81,7 +81,7 @@ namespace af {
             return m_shared->m_grads[0];
         }
 
-        bool Variable::isCalcGrad()
+        bool Variable::isCalcGrad() const
         {
             return m_shared->m_calc_grad;
         }
@@ -96,7 +96,7 @@ namespace af {
             }
         }
 
-        void Variable::addGrad(Variable child_grad)
+        void Variable::addGrad(const Variable &child_grad)
         {
             if (m_shared->m_calc_grad) {
                 m_shared->m_grads.push_back(child_grad);
@@ -107,13 +107,17 @@ namespace af {
         {
             // Flag asking not to calculate gradients
             if (!m_shared->m_calc_grad) return;
-            Variable grad = m_shared->m_grads[0];
-            for (unsigned i = 1; i < m_shared->m_grads.size(); i++) {
-                grad = grad + m_shared->m_grads[i];
+
+            // Best not to evaluate the JIT immediately if theres only a single gradient
+            if (m_shared->m_grads.size() > 1) {
+                Variable grad = m_shared->m_grads[0];
+                for (unsigned i = 1; i < m_shared->m_grads.size(); i++) {
+                    grad = grad + m_shared->m_grads[i];
+                }
+                grad.array().eval();
+                m_shared->m_grads.clear();
+                m_shared->m_grads.push_back(grad);
             }
-            grad.array().eval();
-            m_shared->m_grads.clear();
-            m_shared->m_grads.push_back(grad);
         }
 
         void Variable::calcGradInputs()
@@ -124,7 +128,7 @@ namespace af {
             }
         }
 
-        void Variable::backward(Variable grad)
+        void Variable::backward(const Variable &grad)
         {
             this->addGrad(grad);
             Variable::DAG_t dag = this->build();

From 664cf7c3ea5a2fd1f609e095fef38493103c29f9 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Tue, 4 Jul 2017 23:48:31 -0700
Subject: [PATCH 09/20] Add scalar support for operators

---
 examples/autograd.cpp             | 14 ++++++++++++++
 include/af/autograd/Functions.hpp | 10 ++++++++++
 src/autograd/Functions.cpp        | 24 ++++++++++++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/examples/autograd.cpp b/examples/autograd.cpp
index 40b1dc9..2286233 100644
--- a/examples/autograd.cpp
+++ b/examples/autograd.cpp
@@ -83,6 +83,19 @@ void test_divide_add()
     VERIFY(dy.array() - (1.0 - x.array() / (y.array() * y.array())));
 }
 
+void test_multiply_add_scalar()
+{
+    auto x = Variable(af::randu(5), true);
+    auto y = Variable(af::randu(5), true);
+    auto z = 2 * x + x * y + y;
+    auto dz = Variable(af::constant(1.0, 5), false);
+    z.backward(dz);
+    auto dx = x.grad();
+    auto dy = y.grad();
+    VERIFY(dx.array() - (2.0 + y.array()));
+    VERIFY(dy.array() - (1.0 + x.array()));
+}
+
 int main()
 {
     af::info();
@@ -91,5 +104,6 @@ int main()
     test_no_calc_grad();
     test_multiply_sub();
     test_divide_add();
+    test_multiply_add_scalar();
     return 0;
 }
diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp
index 2fa493b..d4203cd 100644
--- a/include/af/autograd/Functions.hpp
+++ b/include/af/autograd/Functions.hpp
@@ -18,6 +18,16 @@ namespace af {
         Variable operator -(const Variable &lhs, const Variable &rhs);
         Variable operator /(const Variable &lhs, const Variable &rhs);
 
+        Variable operator +(const double &lhs, const Variable &rhs);
+        Variable operator *(const double &lhs, const Variable &rhs);
+        Variable operator -(const double &lhs, const Variable &rhs);
+        Variable operator /(const double &lhs, const Variable &rhs);
+
+        Variable operator +(const Variable &lhs, const double &rhs);
+        Variable operator *(const Variable &lhs, const double &rhs);
+        Variable operator -(const Variable &lhs, const double &rhs);
+        Variable operator /(const Variable &lhs, const double &rhs);
+
         Variable negate(const Variable &input);
         Variable reciprocal(const Variable &input);
     }
diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp
index e6692ad..55c3d33 100644
--- a/src/autograd/Functions.cpp
+++ b/src/autograd/Functions.cpp
@@ -74,5 +74,29 @@ namespace af {
             return Variable(result, {input}, grad_func);
         }
 
+
+#define INSTANTIATE_OPERATOR(OP)                                        \
+        Variable operator OP(const double &lhs_val, const Variable &rhs) \
+        {                                                               \
+            auto lhs = Variable(                                        \
+                af::constant(lhs_val,                                   \
+                             rhs.array().dims(),                        \
+                             rhs.array().type()),                       \
+                false);                                                 \
+            return lhs OP rhs;                                          \
+        }                                                               \
+        Variable operator OP(const Variable &lhs, const double &rhs_val) \
+        {                                                               \
+            auto rhs = Variable(                                        \
+                af::constant(rhs_val,                                   \
+                             lhs.array().dims(), lhs.array().type()),   \
+                false);                                                 \
+            return lhs OP rhs;                                          \
+        }                                                               \
+
+        INSTANTIATE_OPERATOR(+)
+        INSTANTIATE_OPERATOR(-)
+        INSTANTIATE_OPERATOR(*)
+        INSTANTIATE_OPERATOR(/)
     }
 }

From 45b21da2b4327fb596395b681350d066c88b34bb Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Wed, 5 Jul 2017 00:08:25 -0700
Subject: [PATCH 10/20] Adding exp, sin, cos, tanh, and sigmoid functions

---
 examples/autograd.cpp             | 35 ++++++++++++
 include/af/autograd/Functions.hpp |  6 +++
 src/autograd/Functions.cpp        | 89 ++++++++++++++++++++++++-------
 3 files changed, 110 insertions(+), 20 deletions(-)

diff --git a/examples/autograd.cpp b/examples/autograd.cpp
index 2286233..8b96bca 100644
--- a/examples/autograd.cpp
+++ b/examples/autograd.cpp
@@ -96,6 +96,38 @@ void test_multiply_add_scalar()
     VERIFY(dy.array() - (1.0 + x.array()));
 }
 
+void test_exp()
+{
+    auto x = Variable(af::randu(5), true);
+    auto y = exp(x);
+    auto dy = Variable(af::constant(1.0, 5), false);
+    y.backward(dy);
+    auto dx = x.grad();
+    VERIFY(dx.array() - (af::exp(x.array())));
+}
+
+void test_sigmoid()
+{
+    auto x = Variable(af::randu(5), true);
+    auto y = sigmoid(x);
+    auto dy = Variable(af::constant(1.0, 5), false);
+    y.backward(dy);
+    auto dx = x.grad();
+    VERIFY(dx.array() - (y.array() * (1 - y.array())));
+    VERIFY(dx.array() - (af::sigmoid(x.array()) * (1 - af::sigmoid(x.array()))));
+}
+
+void test_tanh()
+{
+    auto x = Variable(af::randu(5), true);
+    auto y = tanh(x);
+    auto dy = Variable(af::constant(1.0, 5), false);
+    y.backward(dy);
+    auto dx = x.grad();
+    VERIFY(dx.array() - (1 - y.array() * y.array()));
+    VERIFY(dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array())));
+}
+
 int main()
 {
     af::info();
@@ -105,5 +137,8 @@ int main()
     test_multiply_sub();
     test_divide_add();
     test_multiply_add_scalar();
+    test_exp();
+    test_sigmoid();
+    test_tanh();
     return 0;
 }
diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp
index d4203cd..de45495 100644
--- a/include/af/autograd/Functions.hpp
+++ b/include/af/autograd/Functions.hpp
@@ -30,5 +30,11 @@ namespace af {
 
         Variable negate(const Variable &input);
         Variable reciprocal(const Variable &input);
+
+        Variable exp(const Variable &input);
+        Variable sin(const Variable &input);
+        Variable cos(const Variable &input);
+        Variable tanh(const Variable &input);
+        Variable sigmoid(const Variable &input);
     }
 }
diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp
index 55c3d33..8b7d876 100644
--- a/src/autograd/Functions.cpp
+++ b/src/autograd/Functions.cpp
@@ -55,26 +55,6 @@ namespace af {
             return Variable(result, {lhs, rhs}, grad_func);
         }
 
-        Variable negate(const Variable &input)
-        {
-            auto result = 0.0 - input.array();
-            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
-                inputs[0].addGrad(negate(grad_output));
-            };
-            return Variable(result, {input}, grad_func);
-        }
-
-        Variable reciprocal(const Variable &input)
-        {
-            auto result = 1.0 / input.array();
-            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
-                auto res = reciprocal(inputs[0]);
-                inputs[0].addGrad(negate(grad_output) * res * res);
-            };
-            return Variable(result, {input}, grad_func);
-        }
-
-
 #define INSTANTIATE_OPERATOR(OP)                                        \
         Variable operator OP(const double &lhs_val, const Variable &rhs) \
         {                                                               \
@@ -98,5 +78,74 @@ namespace af {
         INSTANTIATE_OPERATOR(-)
         INSTANTIATE_OPERATOR(*)
         INSTANTIATE_OPERATOR(/)
+
+#undef INSTANTIATE_OPERATOR
+
+        Variable negate(const Variable &input)
+        {
+            auto result = 0.0 - input.array();
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                inputs[0].addGrad(negate(grad_output));
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
+        Variable reciprocal(const Variable &input)
+        {
+            auto result = 1.0 / input.array();
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                auto res = reciprocal(inputs[0]);
+                inputs[0].addGrad(negate(grad_output) * res * res);
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
+        Variable exp(const Variable &input)
+        {
+            auto result = exp(input.array());
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                inputs[0].addGrad(exp(inputs[0]));
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
+        Variable sin(const Variable &input)
+        {
+            auto result = sin(input.array());
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                inputs[0].addGrad(cos(inputs[0]));
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
+        Variable cos(const Variable &input)
+        {
+            auto result = cos(input.array());
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                inputs[0].addGrad(negate(sin(inputs[0])));
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
+        Variable tanh(const Variable &input)
+        {
+            auto result = tanh(input.array());
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                auto tmp = tanh(inputs[0]);
+                inputs[0].addGrad(1.0 - tmp * tmp);
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
+        Variable sigmoid(const Variable &input)
+        {
+            auto result = sigmoid(input.array());
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                auto tmp = sigmoid(inputs[0]);
+                inputs[0].addGrad(tmp * (1 - tmp));
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
     }
 }

From 3b985b0670dc5711cbe002496236abc850182528 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Wed, 5 Jul 2017 00:34:36 -0700
Subject: [PATCH 11/20] Adding expandAs, reduceAs, and transpose

---
 examples/autograd.cpp             | 28 +++++++++++++++++++++++
 include/af/autograd/Functions.hpp |  4 ++++
 src/autograd/Functions.cpp        | 37 +++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/examples/autograd.cpp b/examples/autograd.cpp
index 8b96bca..702992b 100644
--- a/examples/autograd.cpp
+++ b/examples/autograd.cpp
@@ -128,6 +128,32 @@ void test_tanh()
     VERIFY(dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array())));
 }
 
+void test_expand()
+{
+    auto x = Variable(af::randu(5), true);
+    auto y = Variable(af::randu(5, 2), true);
+    auto z = y * expandAs(x, y);
+    auto dz = Variable(af::constant(1.0, 5, 2), false);
+    z.backward(dz);
+    auto dy = y.grad();
+    auto dx = x.grad();
+    VERIFY(dy.array() - af::tile(x.array(), 1, 2));
+    VERIFY(dx.array() - af::sum(y.array(), 1));
+}
+
+void test_reduce()
+{
+    auto x = Variable(af::randu(5), true);
+    auto y = Variable(af::randu(5, 2), true);
+    auto z = x * reduceAs(y, x);
+    auto dz = Variable(af::constant(1.0, 5), false);
+    z.backward(dz);
+    auto dy = y.grad();
+    auto dx = x.grad();
+    VERIFY(dy.array() - af::tile(x.array(), 1, 2));
+    VERIFY(dx.array() - af::sum(y.array(), 1));
+}
+
 int main()
 {
     af::info();
@@ -140,5 +166,7 @@ int main()
     test_exp();
     test_sigmoid();
     test_tanh();
+    test_expand();
+    test_reduce();
     return 0;
 }
diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp
index de45495..49b0954 100644
--- a/include/af/autograd/Functions.hpp
+++ b/include/af/autograd/Functions.hpp
@@ -36,5 +36,9 @@ namespace af {
         Variable cos(const Variable &input);
         Variable tanh(const Variable &input);
         Variable sigmoid(const Variable &input);
+
+        Variable transpose(const Variable &input);
+        Variable expandAs(const Variable &input, const Variable &reference);
+        Variable reduceAs(const Variable &input, const Variable &reference);
     }
 }
diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp
index 8b7d876..653a16d 100644
--- a/src/autograd/Functions.cpp
+++ b/src/autograd/Functions.cpp
@@ -147,5 +147,42 @@ namespace af {
             return Variable(result, {input}, grad_func);
         }
 
+        Variable transpose(const Variable &input)
+        {
+            auto result = transpose(input.array());
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                inputs[0].addGrad(transpose(grad_output));
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
+        Variable expandAs(const Variable &input, const Variable &reference)
+        {
+            dim4 dims(1,1,1,1);
+            dim4 idims = input.array().dims();
+            dim4 rdims = reference.array().dims();
+            for (int i = 0; i < 4; i++) {
+                dims[i] = rdims[i] / idims[i];
+            }
+            auto result = tile(input.array(), dims);
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                inputs[0].addGrad(reduceAs(grad_output, inputs[0]));
+            };
+            return Variable(result, {input}, grad_func);
+        }
+
+        Variable reduceAs(const Variable &input, const Variable &reference)
+        {
+            dim4 idims = input.array().dims();
+            dim4 rdims = reference.array().dims();
+            auto result = input.array();
+            for (int i = 0; i < 4; i++) {
+                if (idims[i] != rdims[i]) result = sum(result, i);
+            }
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                inputs[0].addGrad(expandAs(grad_output, inputs[0]));
+            };
+            return Variable(result, {input}, grad_func);
+        }
     }
 }

From 9b052736e7d8a450cc9c2c2a299695786a89bc25 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Wed, 5 Jul 2017 01:07:10 -0700
Subject: [PATCH 12/20] Adding matmul, matmulTN, and matmulNT functions

---
 include/af/autograd/Functions.hpp |  4 ++
 src/autograd/Functions.cpp        | 63 +++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp
index 49b0954..17a190e 100644
--- a/include/af/autograd/Functions.hpp
+++ b/include/af/autograd/Functions.hpp
@@ -40,5 +40,9 @@ namespace af {
         Variable transpose(const Variable &input);
         Variable expandAs(const Variable &input, const Variable &reference);
         Variable reduceAs(const Variable &input, const Variable &reference);
+
+        Variable matmul(const Variable &lhs, const Variable &rhs);
+        Variable matmulTN(const Variable &lhs, const Variable &rhs);
+        Variable matmulNT(const Variable &lhs, const Variable &rhs);
     }
 }
diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp
index 653a16d..fcabaaa 100644
--- a/src/autograd/Functions.cpp
+++ b/src/autograd/Functions.cpp
@@ -184,5 +184,68 @@ namespace af {
             };
             return Variable(result, {input}, grad_func);
         }
+
+        Variable matmul(const Variable &lhs, const Variable &rhs)
+        {
+            // lhs:Input[0] -- [M, N]
+            // rhs:Input[1] -- [N, K]
+            //matmul(lhs, rhs)
+            // -- matmul([M, N], [N, K]) --  [M, K]
+            // result:grad_output -- [M, K]
+            auto result = matmul(lhs.array(), rhs.array());
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                // matmulNT(grad_output, inputs[1])
+                // -- matmulNT([M, K], [N, K])
+                // -- matmul([M, K], [K, N]) -- [M, K]
+                inputs[0].addGrad(matmulNT(grad_output, inputs[1]));
+                // matmulTN(inputs[0], grad_output)
+                // -- matmulTN([M, N], [M, K])
+                // -- matmul([N, M], [M, K]) -- [N, K]
+                inputs[1].addGrad(matmulTN(inputs[0], grad_output));
+            };
+            return Variable(result, {lhs, rhs}, grad_func);
+        }
+
+        Variable matmulTN(const Variable &lhs, const Variable &rhs)
+        {
+            // lhs:Input[0] -- [N, M]
+            // rhs:Input[1] -- [N, K]
+            // matmulTN(lhs, rhs)
+            // -- matmulTN([N, M], [N, K])
+            // -- matmul([M, N], [N, K]) -- [M, K]
+            // result:grad_output -- [M, K]
+            auto result = matmulTN(lhs.array(), rhs.array());
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                // matmulNT(inputs[1], grad_output)
+                // -- matmulNT([N, K], [M, K])
+                // -- matmul([N, K], [K, M]) -- [N, M]
+                inputs[0].addGrad(matmulNT(inputs[1], grad_output));
+                // matmul(inputs[0], grad_output)
+                // -- matmulNT([N, M], [M, K]) -- [N, K]
+                inputs[1].addGrad(matmul(inputs[0], grad_output));
+            };
+            return Variable(result, {lhs, rhs}, grad_func);
+        }
+
+        Variable matmulNT(const Variable &lhs, const Variable &rhs)
+        {
+            // lhs:Input[0] -- [M, N]
+            // rhs:Input[1] -- [K, N]
+            // matmulNT(lhs, rhs)
+            // -- matmulNT([M, N], [K, N])
+            // -- matmul([M, N], [N, K]) -- [M, K]
+            // result:grad_output -- [M, K]
+            auto result = matmulNT(lhs.array(), rhs.array());
+            auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
+                // matmul(grad_output, inputs[1])
+                // -- matmul([M, K], [K, N]) -- [M, N]
+                inputs[0].addGrad(matmul(grad_output, inputs[1]));
+                // matmulTN(grad_output, inputs[0])
+                // -- matmulTN([M, K], [M, N])
+                // -- matmul([K, M], [M, N]) -- [K, N]
+                inputs[1].addGrad(matmulTN(grad_output, inputs[0]));
+            };
+            return Variable(result, {lhs, rhs}, grad_func);
+        }
     }
 }

From 49b8917f272577b60d6a1f0e778e3c3867ee3e82 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Wed, 5 Jul 2017 01:22:52 -0700
Subject: [PATCH 13/20] Add option to explicitly request higher order
 gradients.

- Disabled by default
- can be enabled by passing true as second argument to backward
---
 include/af/autograd/Variable.hpp | 11 +++++------
 src/autograd/Variable.cpp        | 23 ++++++++++++++++-------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index 74dd61e..e4a14b7 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -60,16 +60,15 @@ namespace af {
 
             void addGrad(const Variable &child_grad);
 
-            void evalGrad();
+            void calcGradInputs(bool retain_grad_graph = false);
 
-            void calcGradInputs();
-
-            void backward(const Variable &grad);
-
-            DAG_t build();
+            void backward(const Variable &grad, bool retain_grad_graph = false);
 
             void buildSubGraph(Cache_t &cache, DAG_t &dag);
         private:
+            void evalGrad(bool retain_grad_graph = false);
+
+            DAG_t build();
             std::shared_ptr<Shared> m_shared;
         };
     }
diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp
index 3a7a41b..a50443a 100644
--- a/src/autograd/Variable.cpp
+++ b/src/autograd/Variable.cpp
@@ -103,24 +103,33 @@ namespace af {
             }
         }
 
-        void Variable::evalGrad()
+        void Variable::evalGrad(bool retain_grad_graph)
         {
             // Flag asking not to calculate gradients
             if (!m_shared->m_calc_grad) return;
 
             // Best not to evaluate the JIT immediately if theres only a single gradient
+            Variable grad = m_shared->m_grads[0];
             if (m_shared->m_grads.size() > 1) {
-                Variable grad = m_shared->m_grads[0];
                 for (unsigned i = 1; i < m_shared->m_grads.size(); i++) {
                     grad = grad + m_shared->m_grads[i];
                 }
                 grad.array().eval();
-                m_shared->m_grads.clear();
-                m_shared->m_grads.push_back(grad);
+                m_shared->m_grads.resize(1);
+            }
+
+            // Remove the graph if not needed
+            if (!retain_grad_graph) {
+                // This can be done by extracting af::array and ignoring everything else
+                auto grad_data = grad.array();
+                // Since there's no graph leading this, set calc_grad to false
+                grad = Variable(grad_data, false);
             }
+
+            m_shared->m_grads[0] = grad;
         }
 
-        void Variable::calcGradInputs()
+        void Variable::calcGradInputs(bool retain_grad_graph)
         {
             evalGrad();
             if (m_shared->m_grad_func) {
@@ -128,12 +137,12 @@ namespace af {
             }
         }
 
-        void Variable::backward(const Variable &grad)
+        void Variable::backward(const Variable &grad, bool retain_grad_graph)
         {
             this->addGrad(grad);
             Variable::DAG_t dag = this->build();
             for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) {
-                iter->calcGradInputs();
+                iter->calcGradInputs(retain_grad_graph);
             }
         }
 

From 8bf7f1bb3896b0a5f02d1c9236dba0bf340ea76e Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Wed, 5 Jul 2017 19:28:18 -0700
Subject: [PATCH 14/20] Convert Variable::build and Variable::buildSubGraph to
 static functions

---
 include/af/autograd/Variable.hpp | 11 +++++++++--
 src/autograd/Variable.cpp        | 28 +++++++++++++++++++---------
 2 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index e4a14b7..3c7d176 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -54,6 +54,8 @@ namespace af {
 
             Variable grad() const;
 
+            std::ptrdiff_t id() const;
+
             bool isCalcGrad() const;
 
             void setCalcGrad(bool calc_grad);
@@ -64,11 +66,16 @@ namespace af {
 
             void backward(const Variable &grad, bool retain_grad_graph = false);
 
-            void buildSubGraph(Cache_t &cache, DAG_t &dag);
+
         private:
             void evalGrad(bool retain_grad_graph = false);
 
-            DAG_t build();
+            std::vector<Variable> getInputs() const;
+
+            static void buildSubGraph(Cache_t &cache, DAG_t &dag, const Variable &var);
+
+            static DAG_t build(const Variable &var);
+
             std::shared_ptr<Shared> m_shared;
         };
     }
diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp
index a50443a..df6f9e5 100644
--- a/src/autograd/Variable.cpp
+++ b/src/autograd/Variable.cpp
@@ -81,6 +81,16 @@ namespace af {
             return m_shared->m_grads[0];
         }
 
+        std::ptrdiff_t Variable::id() const
+        {
+            return (std::ptrdiff_t)m_shared.get();
+        }
+
+        std::vector<Variable> Variable::getInputs() const
+        {
+            return m_shared->m_inputs;
+        }
+
         bool Variable::isCalcGrad() const
         {
             return m_shared->m_calc_grad;
@@ -140,31 +150,31 @@ namespace af {
         void Variable::backward(const Variable &grad, bool retain_grad_graph)
         {
             this->addGrad(grad);
-            Variable::DAG_t dag = this->build();
+            Variable::DAG_t dag = Variable::build(*this);
             for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) {
                 iter->calcGradInputs(retain_grad_graph);
             }
         }
 
-        Variable::DAG_t Variable::build()
+        Variable::DAG_t Variable::build(const Variable &var)
         {
             Cache_t cache;
-                    Variable::DAG_t dag;
-            this->buildSubGraph(cache, dag);
+            Variable::DAG_t dag;
+            Variable::buildSubGraph(cache, dag, var);
             return dag;
         }
 
-        void Variable::buildSubGraph(Cache_t &cache, Variable::DAG_t &dag)
+        void Variable::buildSubGraph(Cache_t &cache, Variable::DAG_t &dag, const Variable &var)
         {
-            std::ptrdiff_t id = (std::ptrdiff_t)m_shared.get();
+            std::ptrdiff_t id = var.id();
             if (cache.find(id) != cache.end()) {
                 return;
             }
-            for (auto input : m_shared->m_inputs) {
-                input.buildSubGraph(cache, dag);
+            for (auto input : var.getInputs()) {
+                Variable::buildSubGraph(cache, dag, input);
             }
             cache[id] = true;
-            dag.push_back(*this);
+            dag.push_back(var);
         }
     }
 }

From 5eda60030805bf43755a53a0f7ac79dbec44a03f Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Wed, 5 Jul 2017 23:33:57 -0700
Subject: [PATCH 15/20] Overhaul of af::nn to use af::autograd

- Implemented baseclass nn::Module
- Added basic modules: nn::Linear, nn::Sigmoid, nn:Tanh
- Added container modules: nn:Container, nn:Sequential
- Deleted unnecessary examples, cleaned up perceptron.cpp
---
 CMakeLists.txt                                |   7 +-
 examples/Activations.cpp                      |  33 ----
 examples/CMakeLists.txt                       |   8 +-
 examples/FFNet.cpp                            |  57 -------
 examples/Node.cpp                             |  21 ---
 examples/Weights.cpp                          |  20 ---
 examples/perceptron.cpp                       |  43 ++++--
 include/af/autograd/Variable.hpp              |   6 +-
 include/af/nn.h                               |   9 +-
 include/af/nn/Activations.hpp                 |  15 --
 include/af/nn/Activations/Activation.hpp      |  54 -------
 include/af/nn/Activations/Sigmoid.hpp         |  43 ------
 include/af/nn/Activations/Tanh.hpp            |  40 -----
 include/af/nn/Activations/Threshold.hpp       |  42 -----
 include/af/nn/{Networks.hpp => Modules.hpp}   |   7 +-
 include/af/nn/Modules/Activations.hpp         |  34 ++++
 include/af/nn/Modules/Container.hpp           |  57 +++++++
 include/af/nn/Modules/Linear.hpp              |  31 ++++
 include/af/nn/Modules/Module.hpp              |  41 +++++
 include/af/nn/Networks/FFNet.hpp              | 104 -------------
 include/af/nn/Nodes.hpp                       |  12 --
 include/af/nn/Nodes/Linear.hpp                |  68 --------
 include/af/nn/Nodes/Node.hpp                  | 115 --------------
 .../af/nn/{Activations/ReLU.hpp => Types.hpp} |  18 ++-
 include/af/nn/Weights.hpp                     | 146 ------------------
 include/af/nn/common.hpp                      |  20 ---
 src/autograd/Variable.cpp                     |  13 +-
 src/nn/Modules/Activations.cpp                |  33 ++++
 src/nn/Modules/Container.cpp                  |  42 +++++
 src/nn/Modules/Linear.cpp                     |  59 +++++++
 src/nn/Modules/Module.cpp                     |  48 ++++++
 src/nn/Types.cpp                              |  36 +++++
 32 files changed, 452 insertions(+), 830 deletions(-)
 delete mode 100644 examples/Activations.cpp
 delete mode 100644 examples/FFNet.cpp
 delete mode 100644 examples/Node.cpp
 delete mode 100644 examples/Weights.cpp
 delete mode 100644 include/af/nn/Activations.hpp
 delete mode 100644 include/af/nn/Activations/Activation.hpp
 delete mode 100644 include/af/nn/Activations/Sigmoid.hpp
 delete mode 100644 include/af/nn/Activations/Tanh.hpp
 delete mode 100644 include/af/nn/Activations/Threshold.hpp
 rename include/af/nn/{Networks.hpp => Modules.hpp} (62%)
 create mode 100644 include/af/nn/Modules/Activations.hpp
 create mode 100644 include/af/nn/Modules/Container.hpp
 create mode 100644 include/af/nn/Modules/Linear.hpp
 create mode 100644 include/af/nn/Modules/Module.hpp
 delete mode 100644 include/af/nn/Networks/FFNet.hpp
 delete mode 100644 include/af/nn/Nodes.hpp
 delete mode 100644 include/af/nn/Nodes/Linear.hpp
 delete mode 100644 include/af/nn/Nodes/Node.hpp
 rename include/af/nn/{Activations/ReLU.hpp => Types.hpp} (50%)
 delete mode 100644 include/af/nn/Weights.hpp
 delete mode 100644 include/af/nn/common.hpp
 create mode 100644 src/nn/Modules/Activations.cpp
 create mode 100644 src/nn/Modules/Container.cpp
 create mode 100644 src/nn/Modules/Linear.cpp
 create mode 100644 src/nn/Modules/Module.cpp
 create mode 100644 src/nn/Types.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 699baf6..c84783d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,8 +10,13 @@ add_library(afml SHARED "")
 
 target_sources(afml
   PRIVATE
-  src/autograd/Variable.cpp
   src/autograd/Functions.cpp
+  src/autograd/Variable.cpp
+  src/nn/Modules/Activations.cpp
+  src/nn/Modules/Container.cpp
+  src/nn/Modules/Linear.cpp
+  src/nn/Modules/Module.cpp
+  src/nn/Types.cpp
   )
 
 target_include_directories(afml
diff --git a/examples/Activations.cpp b/examples/Activations.cpp
deleted file mode 100644
index 594bd5f..0000000
--- a/examples/Activations.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-
-#include <af/nn/Activations.hpp>
-
-using namespace af::nn;
-
-int main()
-{
-    const int num = 5;
-
-    af::ArrayVector in = {100 * af::randu(num, 1) - 50};
-    af::ArrayVector grad = {100 * af::randu(num, 1)};
-
-    ReLU    r = ReLU(num, 0);
-    Sigmoid s = Sigmoid(num);
-    Tanh    t = Tanh(num);
-
-    af_print(in[0]);
-    af_print(r.forward(in)[0]);
-    af_print(s.forward(in)[0]);
-    af_print(t.forward(in)[0]);
-
-    af_print(r.backward(in, grad)[0]);
-    af_print(s.backward(in, grad)[0]);
-    af_print(t.backward(in, grad)[0]);
-}
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 6e92389..b1e2404 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -10,9 +10,9 @@ function(build_example SRC)
     PRIVATE cxx_range_for)
 endfunction(build_example)
 
-build_example(Activations.cpp)
-build_example(FFNet.cpp)
-build_example(Node.cpp)
+# build_example(Activations.cpp)
+# build_example(FFNet.cpp)
+# build_example(Node.cpp)
 build_example(perceptron.cpp)
-build_example(Weights.cpp)
+# build_example(Weights.cpp)
 build_example(autograd.cpp)
diff --git a/examples/FFNet.cpp b/examples/FFNet.cpp
deleted file mode 100644
index f4afec0..0000000
--- a/examples/FFNet.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-
-#include <af/nn.h>
-
-using namespace af;
-using namespace af::nn;
-
-int main()
-{
-    af::info();
-    const int inputSize  = 2;
-    const int hiddenSize = 3;
-    const int outputSize = 1;
-    const int numSamples = 4;
-    const double lr = 0.8;
-
-    float hInput[] = {1, 1,
-                      0, 0,
-                      0, 1,
-                      1, 0};
-
-    float hOutput[] = {0,
-                       0,
-                       1,
-                       1};
-
-    af::array in(inputSize, numSamples, hInput);
-    af::array out(outputSize, numSamples, hOutput);
-
-
-    FFNet network(inputSize);
-    network.addLinearNode(hiddenSize, 5).addActivationNode();
-    network.addLinearNode(outputSize, 5).addActivationNode();
-
-    for (int i = 0; i < 1000; i++) {
-
-        ArrayVector data = network.forward({in});
-        double err = af::norm(data[0] - out);
-
-        data[0] = out - data[0];
-
-        if ((i + 1) % 100 == 0) {
-            printf("Error at iteration(%d) : %2.10lf\n", i + 1, err);
-        }
-        network.backward({in}, data);
-        network.update(lr);
-    }
-
-    af_print(af::round(network.forward({in})[0]));
-}
diff --git a/examples/Node.cpp b/examples/Node.cpp
deleted file mode 100644
index eb4229a..0000000
--- a/examples/Node.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-
-#include <af/nn/Nodes/Node.hpp>
-
-using namespace af::nn;
-
-int main()
-{
-    int inSize = 10;
-    int outSize = 2;
-
-    Node n(1, &inSize, 1, &outSize, "test");
-    n.info();
-}
diff --git a/examples/Weights.cpp b/examples/Weights.cpp
deleted file mode 100644
index 9e83c06..0000000
--- a/examples/Weights.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-
-#include <af/nn/Weights.hpp>
-
-using namespace af::nn;
-
-int main()
-{
-    Weights w(10, 1, 0.05);
-    af_print(w);
-
-    return 0;
-}
diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp
index 9975a03..799c52a 100644
--- a/examples/perceptron.cpp
+++ b/examples/perceptron.cpp
@@ -7,17 +7,19 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
+#include <af/autograd.h>
 #include <af/nn.h>
 
 using namespace af;
 using namespace af::nn;
+using namespace af::autograd;
 
 int main()
 {
     const int inputSize  = 2;
     const int outputSize = 1;
     const int numSamples = 4;
-    const double lr = 10;
+    const double lr = 0.005;
 
     float hInput[] = {1, 1,
                       0, 0,
@@ -29,29 +31,36 @@ int main()
                        1,
                        1};
 
-    af::array in(inputSize, numSamples, hInput);
-    af::array out(outputSize, numSamples, hOutput);
+    auto in = af::array(inputSize, numSamples, hInput);
+    auto out = af::array(outputSize, numSamples, hOutput);
 
-    std::vector<NodePtr> perceptron;
-    perceptron.emplace_back(new LinearNode(inputSize, outputSize, 10));
-    perceptron.emplace_back(new Sigmoid(inputSize));
+    nn::Sequential perceptron;
 
+    perceptron.add(nn::Linear(inputSize, outputSize));
+    perceptron.add(nn::Sigmoid());
+
+    Variable result;
     for (int i = 0; i < 10; i++) {
-        ArrayVector data = {in};
 
-        std::vector<ArrayVector> inputs(2);
-        for (int n = 0; n < 2; n++) {
-            inputs[n] = data;
-            data = perceptron[n]->forward(data);
-        }
+        // Forward propagation
+        result = perceptron.forward(nn::input(in));
 
-        data[0] = out - data[0];
+        // Calculate loss
+        // TODO: Use loss function
+        af::array diff = out - result.array();
+        printf("Error at iteration(%d) : %lf\n", i + 1, af::max<float>(af::abs(diff)));
 
-        printf("Error at iteration(%d) : %lf\n", i + 1, af::sum<float>(af::abs(data[0])) / numSamples);
+        // Backward propagation
+        auto d_result = Variable(diff, false);
+        result.backward(d_result);
 
-        for (int n = 1; n >= 0; n--) {
-            data = perceptron[n]->backward(inputs[n], data);
-            perceptron[n]->update(lr);
+        // Update parameters
+        // TODO: Should use optimizer
+        for (auto param : perceptron.parameters()) {
+            param.array() += lr * param.grad().array();
+            param.array().eval();
         }
     }
+    af_print(result.array());
+    return 0;
 }
diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index 3c7d176..8ce10e2 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -50,7 +50,7 @@ namespace af {
                      const std::vector<Variable> &inputs,
                      GradFunc_t grad_func);
 
-            af::array array() const;
+            af::array& array() const;
 
             Variable grad() const;
 
@@ -58,6 +58,10 @@ namespace af {
 
             bool isCalcGrad() const;
 
+            bool isGradAvailable() const;
+
+            void zeroGrad();
+
             void setCalcGrad(bool calc_grad);
 
             void addGrad(const Variable &child_grad);
diff --git a/include/af/nn.h b/include/af/nn.h
index 8ee2134..88333fc 100644
--- a/include/af/nn.h
+++ b/include/af/nn.h
@@ -1,5 +1,5 @@
 /*******************************************************
- * Copyright (c) 2015, ArrayFire
+ * Copyright (c) 2017, ArrayFire
  * All rights reserved.
  *
  * This file is distributed under 3-clause BSD license.
@@ -9,8 +9,5 @@
 
 #pragma once
 
-#include <af/nn/common.hpp>
-#include <af/nn/Nodes.hpp>
-#include <af/nn/Weights.hpp>
-#include <af/nn/Activations.hpp>
-#include <af/nn/Networks.hpp>
+#include <af/nn/Modules.hpp>
+#include <af/nn/Types.hpp>
diff --git a/include/af/nn/Activations.hpp b/include/af/nn/Activations.hpp
deleted file mode 100644
index 4972238..0000000
--- a/include/af/nn/Activations.hpp
+++ /dev/null
@@ -1,15 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/nn/Activations/Activation.hpp>
-#include <af/nn/Activations/Sigmoid.hpp>
-#include <af/nn/Activations/Tanh.hpp>
-#include <af/nn/Activations/Threshold.hpp>
-#include <af/nn/Activations/ReLU.hpp>
diff --git a/include/af/nn/Activations/Activation.hpp b/include/af/nn/Activations/Activation.hpp
deleted file mode 100644
index d98eaf3..0000000
--- a/include/af/nn/Activations/Activation.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/nn/common.hpp>
-#include <af/nn/Nodes/Node.hpp>
-
-namespace af
-{
-
-    namespace nn
-    {
-        class ActivationNode : public Node
-        {
-        protected:
-
-            virtual af::array fn(const af::array &val)
-            {
-                return val;
-            }
-
-            virtual af::array dfn(const af::array &val)
-            {
-                return af::constant(1, val.dims());
-            }
-
-        public:
-
-            ActivationNode(int size, const char *name="none") :
-                Node(1, &size, 1, &size, name)
-            {
-            }
-
-            ArrayVector forward(const ArrayVector &input)
-            {
-                return { fn(input[0]) };
-            }
-
-            ArrayVector backward(const ArrayVector &input,
-                                 const ArrayVector &gradOutput)
-            {
-                return { gradOutput[0] * dfn(input[0]) };
-            }
-        };
-
-        typedef ActivationNode Activation;
-    }
-}
diff --git a/include/af/nn/Activations/Sigmoid.hpp b/include/af/nn/Activations/Sigmoid.hpp
deleted file mode 100644
index 1fd31d5..0000000
--- a/include/af/nn/Activations/Sigmoid.hpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/nn/Activations/Activation.hpp>
-
-namespace af
-{
-    namespace nn
-    {
-        class SigmoidNode : public ActivationNode
-        {
-        private:
-
-            af::array fn(const af::array &input)
-            {
-                // TODO: replace with af::sigmoid
-                return 1 / (1 + af::exp(-input));
-            }
-
-            af::array dfn(const af::array &input)
-            {
-                af::array output = fn(input);
-                return output * (1 - output);
-            }
-
-        public:
-
-            SigmoidNode(int size, const char *name="none") :
-                ActivationNode(size, name)
-            {
-            }
-        };
-
-        typedef SigmoidNode Sigmoid;
-    }
-}
diff --git a/include/af/nn/Activations/Tanh.hpp b/include/af/nn/Activations/Tanh.hpp
deleted file mode 100644
index abbcd07..0000000
--- a/include/af/nn/Activations/Tanh.hpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/nn/Activations/Activation.hpp>
-
-namespace af
-{
-    namespace nn
-    {
-        class TanhNode : public ActivationNode
-        {
-        private:
-
-            af::array fn(const af::array &input)
-            {
-                return af::tanh(input);
-            }
-
-            af::array dfn(const af::array &input)
-            {
-                af::array output = fn(input);
-                return (1 - output * output);
-            }
-        public:
-            TanhNode(int size, const char *name="none") :
-                ActivationNode(size, name)
-            {
-            }
-        };
-
-        typedef TanhNode Tanh;
-    }
-}
diff --git a/include/af/nn/Activations/Threshold.hpp b/include/af/nn/Activations/Threshold.hpp
deleted file mode 100644
index bded798..0000000
--- a/include/af/nn/Activations/Threshold.hpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/nn/Activations/Activation.hpp>
-
-namespace af
-{
-    namespace nn
-    {
-        class ThresholdNode : public ActivationNode
-        {
-        private:
-            float mVal;
-
-            af::array fn(const af::array &input)
-            {
-                af::array cond = (input >= mVal);
-                return (cond) * input + (1 - cond) * mVal;
-            }
-
-            af::array dfn(const af::array &input)
-            {
-                return (input >= mVal).as(input.type());
-            }
-        public:
-            ThresholdNode(int size, float val, const char *name="none") :
-                ActivationNode(size, name),
-                mVal(val)
-            {
-            }
-        };
-
-        typedef ThresholdNode Threshold;
-    }
-}
diff --git a/include/af/nn/Networks.hpp b/include/af/nn/Modules.hpp
similarity index 62%
rename from include/af/nn/Networks.hpp
rename to include/af/nn/Modules.hpp
index 2517121..310e4e9 100644
--- a/include/af/nn/Networks.hpp
+++ b/include/af/nn/Modules.hpp
@@ -1,5 +1,5 @@
 /*******************************************************
- * Copyright (c) 2015, ArrayFire
+ * Copyright (c) 2017, ArrayFire
  * All rights reserved.
  *
  * This file is distributed under 3-clause BSD license.
@@ -8,4 +8,7 @@
  ********************************************************/
 #pragma once
 
-#include <af/nn/Networks/FFNet.hpp>
+#include <af/nn/Modules/Module.hpp>
+#include <af/nn/Modules/Linear.hpp>
+#include <af/nn/Modules/Container.hpp>
+#include <af/nn/Modules/Activations.hpp>
diff --git a/include/af/nn/Modules/Activations.hpp b/include/af/nn/Modules/Activations.hpp
new file mode 100644
index 0000000..1530cd9
--- /dev/null
+++ b/include/af/nn/Modules/Activations.hpp
@@ -0,0 +1,34 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#pragma once
+
+#include <af/autograd/Variable.hpp>
+#include <af/nn/Modules/Module.hpp>
+
+namespace af
+{
+    namespace nn
+    {
+        class Sigmoid : public Module
+        {
+        public:
+            Sigmoid();
+
+            autograd::Variable forward(const autograd::Variable &input);
+        };
+
+        class Tanh : public Module
+        {
+        public:
+            Tanh();
+
+            autograd::Variable forward(const autograd::Variable &input);
+        };
+    }
+}
diff --git a/include/af/nn/Modules/Container.hpp b/include/af/nn/Modules/Container.hpp
new file mode 100644
index 0000000..2ee8c0e
--- /dev/null
+++ b/include/af/nn/Modules/Container.hpp
@@ -0,0 +1,57 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#pragma once
+
+#include <memory>
+#include <af/autograd/Variable.hpp>
+#include <af/nn/Modules/Module.hpp>
+
+namespace af
+{
+    namespace nn
+    {
+
+        typedef std::shared_ptr<Module> ModulePtr;
+
+        class Container : public Module
+        {
+        protected:
+
+            std::vector<ModulePtr> m_modules;
+
+            Container();
+
+        public:
+
+            template<typename T>
+            void add(T module)
+            {
+                m_modules.emplace_back(new T(module));
+                for (auto param : module.parameters()) {
+                    m_parameters.push_back(param);
+                }
+            }
+
+            ModulePtr get(int id);
+
+            std::vector<ModulePtr> modules();
+
+            virtual autograd::Variable forward(const autograd::Variable &input) = 0;
+        };
+
+        class Sequential : public Container
+        {
+        public:
+
+            Sequential();
+
+            virtual autograd::Variable forward(const autograd::Variable &input);
+        };
+    }
+}
diff --git a/include/af/nn/Modules/Linear.hpp b/include/af/nn/Modules/Linear.hpp
new file mode 100644
index 0000000..f7a1ecc
--- /dev/null
+++ b/include/af/nn/Modules/Linear.hpp
@@ -0,0 +1,31 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#pragma once
+
+#include <af/nn/Modules/Module.hpp>
+
+namespace af
+{
+    namespace nn
+    {
+        class Linear : public Module
+        {
+        private:
+            bool m_bias;
+        public:
+            Linear(int input_size, int output_size, bool bias = true, float spread = 0.05);
+
+            Linear(const autograd::Variable &w);
+
+            Linear(const autograd::Variable &w, const autograd::Variable &b);
+
+            autograd::Variable forward(const autograd::Variable &input);
+        };
+    }
+}
diff --git a/include/af/nn/Modules/Module.hpp b/include/af/nn/Modules/Module.hpp
new file mode 100644
index 0000000..a35db1f
--- /dev/null
+++ b/include/af/nn/Modules/Module.hpp
@@ -0,0 +1,41 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include <af/autograd/Variable.hpp>
+
+namespace af
+{
+    namespace nn
+    {
+
+        class Module
+        {
+        protected:
+            std::vector<autograd::Variable> m_parameters;
+
+            Module();
+
+            Module(const std::vector<autograd::Variable> &parameters);
+
+            void setParams(const std::vector<autograd::Variable> &parameters);
+
+        public:
+
+            std::vector<autograd::Variable> parameters();
+
+            void zeroGrad();
+
+            virtual autograd::Variable forward(const autograd::Variable &input) = 0;
+        };
+    }
+}
diff --git a/include/af/nn/Networks/FFNet.hpp b/include/af/nn/Networks/FFNet.hpp
deleted file mode 100644
index 1e00f18..0000000
--- a/include/af/nn/Networks/FFNet.hpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/nn/common.hpp>
-#include <af/nn/Weights.hpp>
-#include <af/nn/Nodes/Linear.hpp>
-#include <af/nn/Activations.hpp>
-
-namespace af
-{
-    namespace nn
-    {
-        class FeedForwardNetwork : public Node
-        {
-        private:
-            IntVector mNodeSizes;
-            std::vector<NodePtr> mNodes;
-            std::vector<ArrayVector> mData;
-
-            template<typename NodeType>
-            FeedForwardNetwork& addNodePtr(NodeType *nodePtr)
-            {
-                mNodes.emplace_back(nodePtr);
-
-                // TODO: Throw exception of node.getOutSizes() has >1 length
-                int size = nodePtr->getOutSizes()[0];
-                mNodeSizes.push_back(size);
-                this->setOutSizes(1, &size);
-                return *this;
-            }
-
-        public:
-
-            FeedForwardNetwork(const int inputSize, const char *name="none") :
-                Node(1, &inputSize, name),
-                mNodeSizes(1),
-                mNodes(0),
-                mData(0)
-            {
-                mNodeSizes[0] = inputSize;
-            }
-
-            template<typename NodeType>
-            FeedForwardNetwork& addNode(const NodeType &node)
-            {
-                return addNodePtr(new NodeType(node));
-            }
-
-
-            FeedForwardNetwork& addLinearNode(const int size, const float spread = 0.05)
-            {
-                return addNodePtr(new LinearNode(mNodeSizes.back(), size, spread));
-            }
-
-            template<typename ActivationType = SigmoidNode>
-            FeedForwardNetwork& addActivationNode()
-            {
-                int size = (int)mNodeSizes.back();
-
-                // Ensure ActivationType is derived from ActivationNode
-                ActivationNode *node = new ActivationType(size);
-
-                return addNodePtr(node);
-            }
-
-            ArrayVector forward(const ArrayVector &input)
-            {
-                mData.resize(mNodeSizes.size());
-                mData[0] = input;
-                for (int i = 0; i < (int)mNodes.size(); i++) {
-                    mData[i + 1] = mNodes[i]->forward(mData[i]);
-                }
-                return mData.back();
-            }
-
-            ArrayVector backward(const ArrayVector &input,
-                                 const ArrayVector &gradOutput)
-            {
-                //TODO: Assert input coming is same as the stored input
-                ArrayVector currGradOutput = gradOutput;
-                for (int i = (int)mNodes.size() - 1; i >= 0; i--) {
-                    currGradOutput = mNodes[i]->backward(mData[i], currGradOutput);
-                }
-                return currGradOutput;
-            }
-
-            void update(float lr)
-            {
-                for(int i = 0; i < (int)mNodes.size(); i++) {
-                    mNodes[i]->update(lr);
-                }
-            }
-        };
-
-        typedef FeedForwardNetwork FFNet;
-    }
-}
diff --git a/include/af/nn/Nodes.hpp b/include/af/nn/Nodes.hpp
deleted file mode 100644
index a4f7c99..0000000
--- a/include/af/nn/Nodes.hpp
+++ /dev/null
@@ -1,12 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/nn/Nodes/Node.hpp>
-#include <af/nn/Nodes/Linear.hpp>
diff --git a/include/af/nn/Nodes/Linear.hpp b/include/af/nn/Nodes/Linear.hpp
deleted file mode 100644
index 9ad2c85..0000000
--- a/include/af/nn/Nodes/Linear.hpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/nn/common.hpp>
-#include <af/nn/Weights.hpp>
-#include <af/nn/Nodes/Node.hpp>
-
-namespace af
-{
-    namespace nn
-    {
-        class LinearNode : public Node
-        {
-        private:
-
-            Weights mWeight, mBias;
-            Weights mWeightDiff, mBiasDiff;
-
-        public:
-
-            LinearNode(const int inputSize, const int outputSize,
-                       float spread = 0.05,
-                       const char *name="none") :
-                Node(1, &inputSize, 1, &outputSize, name),
-                mWeight(inputSize, outputSize, spread),
-                mBias(1, outputSize, spread),
-                mWeightDiff(), mBiasDiff()
-            {
-            }
-
-            ArrayVector forward(const ArrayVector &input)
-            {
-                return {af::matmul(mWeight, input[0]) +
-                        af::tile(mBias, 1, input[0].dims(1))};
-            }
-
-            ArrayVector backward(const ArrayVector &input,
-                                 const ArrayVector &gradOutput)
-            {
-                float m = input[0].dims(1);
-
-                mWeightDiff = af::matmulNT(gradOutput[0], input[0]) / m;
-                mBiasDiff = af::sum(gradOutput[0], 1) / m;
-
-                return { af::matmulTN(mWeight, gradOutput[0]) };
-            }
-
-            void update(float lr)
-            {
-                mWeight += lr * mWeightDiff;
-                mBias   += lr * mBiasDiff;
-
-                mWeight.eval();
-                mBias.eval();
-
-                mWeightDiff.reset();
-                mBiasDiff.reset();
-            }
-        };
-    }
-}
diff --git a/include/af/nn/Nodes/Node.hpp b/include/af/nn/Nodes/Node.hpp
deleted file mode 100644
index ec4eb12..0000000
--- a/include/af/nn/Nodes/Node.hpp
+++ /dev/null
@@ -1,115 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/nn/common.hpp>
-#include <af/nn/Weights.hpp>
-
-#include <memory>
-#include <cstring>
-
-namespace af
-{
-    namespace nn
-    {
-
-        class Node
-        {
-        private:
-            IntVector  mInputSizes;
-            IntVector mOutputSizes;
-
-            char mName[MAX_NAME_SIZE];
-
-            void set(const int *inputSizes, const int *outputSizes,
-                     const char *name, const int count)
-            {
-                for (int i = 0; i <  (int)mInputSizes.size(); i++)  mInputSizes[i] =  inputSizes[i];
-                for (int i = 0; i < (int)mOutputSizes.size(); i++) mOutputSizes[i] = outputSizes[i];
-
-                int len = std::min(count, MAX_NAME_SIZE - 1);
-                std::memcpy(mName, name, len);
-                mName[len] = 0;
-            }
-
-        protected:
-            void setOutSizes(const int numOutputs, const int *outputSizes)
-            {
-                mOutputSizes.resize(numOutputs);
-                for (int i = 0; i < numOutputs; i++) {
-                    mOutputSizes[i] = outputSizes[i];
-                }
-            }
-
-            Node(const int numInputs, const int *inputSizes, const char *name):
-                mInputSizes(numInputs), mOutputSizes(numInputs)
-            {
-                set(inputSizes, inputSizes, name, (int)strlen(name));
-            }
-
-        public:
-
-            Node(const int numInputs, const int *inputSizes,
-                 const int numOutputs, const int *outputSizes, const char *name)
-                : mInputSizes(numInputs), mOutputSizes(numOutputs)
-            {
-                set(inputSizes, outputSizes, name, (int)strlen(name));
-            }
-
-            Node(const std::vector<int> &inputSizes,
-                 const std::vector<int> &outputSizes,
-                 const std::string &name)
-                : mInputSizes((int)inputSizes.size()), mOutputSizes((int)outputSizes.size())
-            {
-                set(&inputSizes[0], &outputSizes[0], name.c_str(), (int)name.size());
-            }
-
-            virtual ArrayVector forward(const ArrayVector &input)
-            {
-                return input;
-            }
-
-            virtual ArrayVector backward(const ArrayVector &input,
-                                         const ArrayVector &gradOutput)
-            {
-                return gradOutput;
-            }
-
-            virtual void update(float lr) {}
-
-            //TODO: Add a method that actually returns this information to the user
-            virtual void info()
-            {
-                std::cout << "Name: "  << mName << std::endl;
-                std::cout << "Input sizes: " << std::endl;
-
-                for (int i = 0; i <  (int)mInputSizes.size(); i++) {
-                    std::cout << mInputSizes[i] << std::endl;
-                }
-
-                std::cout << "Output sizes: " << std::endl;
-                for (int i = 0; i < (int)mOutputSizes.size(); i++) {
-                    std::cout << mOutputSizes[i] << std::endl;
-                }
-            }
-
-            IntVector getInSizes() const
-            {
-                return mInputSizes;
-            }
-
-            IntVector getOutSizes() const
-            {
-                return mOutputSizes;
-            }
-        };
-
-        typedef std::shared_ptr<Node> NodePtr;
-    }
-}
diff --git a/include/af/nn/Activations/ReLU.hpp b/include/af/nn/Types.hpp
similarity index 50%
rename from include/af/nn/Activations/ReLU.hpp
rename to include/af/nn/Types.hpp
index 3c47684..6e7e101 100644
--- a/include/af/nn/Activations/ReLU.hpp
+++ b/include/af/nn/Types.hpp
@@ -1,5 +1,5 @@
 /*******************************************************
- * Copyright (c) 2015, ArrayFire
+ * Copyright (c) 2017, ArrayFire
  * All rights reserved.
  *
  * This file is distributed under 3-clause BSD license.
@@ -8,13 +8,15 @@
  ********************************************************/
 #pragma once
 
-#include <af/nn/Activations/Activation.hpp>
-#include <af/nn/Activations/Threshold.hpp>
+#include <af/autograd/Variable.hpp>
 
-namespace af
-{
-    namespace nn
-    {
-        typedef ThresholdNode ReLU;
+namespace af {
+    namespace nn {
+
+        autograd::Variable input(const af::array &arr);
+
+        autograd::Variable parameter(const af::array &arr);
+
+        autograd::Variable weight(int input_size, int output_size, float spread = 0.05);
     }
 }
diff --git a/include/af/nn/Weights.hpp b/include/af/nn/Weights.hpp
deleted file mode 100644
index 3efe645..0000000
--- a/include/af/nn/Weights.hpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <af/nn/common.hpp>
-
-namespace af
-{
-    namespace nn
-    {
-        class Weights
-        {
-            ArrayVector mData;
-
-        public:
-
-            Weights() : mData(1)
-            {
-            }
-
-
-            Weights(int inputSize, int outputSize, float spread) : mData(1)
-            {
-                mData[0] = af::randu(outputSize, inputSize) * spread - spread / 2; //Weights
-            }
-
-            Weights(const af::array &weights) : mData(1)
-            {
-                mData[0] = weights;
-            }
-
-            operator af::array() const
-            {
-                return mData[0];
-            }
-
-            Weights operator+(const Weights &other) const
-            {
-                return mData[0] + other;
-            }
-
-            Weights operator*(const Weights &other) const
-            {
-                return mData[0] * other;
-            }
-
-            Weights operator/(const Weights &other) const
-            {
-                return mData[0] / other;
-            }
-
-            Weights operator-(const Weights &other) const
-            {
-                return mData[0] - other;
-            }
-
-            Weights operator+=(const Weights &other)
-            {
-                mData[0] += other;
-                return *this;
-            }
-
-            Weights operator/=(float val)
-            {
-                mData[0] /= val;
-                return *this;
-            }
-
-            Weights operator*=(const Weights &other)
-            {
-                mData[0] *= other;
-                return *this;
-            }
-
-            Weights operator-=(float val)
-            {
-                mData[0] -= val;
-                return *this;
-            }
-
-            void reset()
-            {
-                mData[0] = af::constant(0, mData[0].dims());
-            }
-
-            void eval()
-            {
-                mData[0].eval();
-            }
-        };
-
-        Weights operator *(const Weights &lhs, const double &rhs)
-        {
-            const af::array lhs_arr = lhs;
-            return lhs_arr * rhs;
-        }
-
-        Weights operator +(const Weights &lhs, const double &rhs)
-        {
-            const af::array lhs_arr = lhs;
-            return lhs_arr + rhs;
-        }
-
-        Weights operator /(const Weights &lhs, const double &rhs)
-        {
-            const af::array lhs_arr = lhs;
-            return lhs_arr / rhs;
-        }
-
-        Weights operator -(const Weights &lhs, const double &rhs)
-        {
-            const af::array lhs_arr = lhs;
-            return lhs_arr - rhs;
-        }
-
-        Weights operator *(const double &lhs, const Weights &rhs)
-        {
-            const af::array rhs_arr = rhs;
-            return lhs * rhs_arr;
-        }
-
-        Weights operator +(const double &lhs, const Weights &rhs)
-        {
-            const af::array rhs_arr = rhs;
-            return lhs + rhs_arr;
-        }
-
-        Weights operator /(const double &lhs, const Weights &rhs)
-        {
-            const af::array rhs_arr = rhs;
-            return lhs / rhs_arr;
-        }
-
-        Weights operator -(const double &lhs, const Weights &rhs)
-        {
-            const af::array rhs_arr = rhs;
-            return lhs - rhs_arr;
-        }
-    }
-}
diff --git a/include/af/nn/common.hpp b/include/af/nn/common.hpp
deleted file mode 100644
index ae6f9c7..0000000
--- a/include/af/nn/common.hpp
+++ /dev/null
@@ -1,20 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-#pragma once
-
-#include <arrayfire.h>
-#include <vector>
-
-namespace af
-{
-    const int MAX_NAME_SIZE = 32;
-
-    typedef std::vector<int> IntVector;
-    typedef std::vector<af::array> ArrayVector;
-}
diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp
index df6f9e5..ad89ce1 100644
--- a/src/autograd/Variable.cpp
+++ b/src/autograd/Variable.cpp
@@ -65,7 +65,7 @@ namespace af {
             }
         }
 
-        af::array Variable::array() const
+        af::array& Variable::array() const
         {
             return m_shared->m_data;
         }
@@ -96,6 +96,17 @@ namespace af {
             return m_shared->m_calc_grad;
         }
 
+        bool Variable::isGradAvailable() const
+        {
+            if (!m_shared->m_calc_grad) return false;
+            return m_shared->m_grads.size() >= 1;
+        }
+
+        void Variable::zeroGrad()
+        {
+            m_shared->m_grads.clear();
+        }
+
         void Variable::setCalcGrad(bool calc_grad)
         {
             m_shared->m_calc_grad = calc_grad;
diff --git a/src/nn/Modules/Activations.cpp b/src/nn/Modules/Activations.cpp
new file mode 100644
index 0000000..0d1ca6e
--- /dev/null
+++ b/src/nn/Modules/Activations.cpp
@@ -0,0 +1,33 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <af/autograd/Functions.hpp>
+#include <af/nn/Modules/Activations.hpp>
+
+namespace af
+{
+    namespace nn
+    {
+        using namespace autograd;
+
+        Sigmoid::Sigmoid() {}
+
+        Variable Sigmoid::forward(const Variable &input)
+        {
+            return sigmoid(input);
+        }
+
+        Tanh::Tanh() {}
+
+        Variable Tanh::forward(const Variable &input)
+        {
+            return tanh(input);
+        }
+    }
+}
diff --git a/src/nn/Modules/Container.cpp b/src/nn/Modules/Container.cpp
new file mode 100644
index 0000000..fbccc22
--- /dev/null
+++ b/src/nn/Modules/Container.cpp
@@ -0,0 +1,42 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <af/autograd/Variable.hpp>
+#include <af/nn/Modules/Container.hpp>
+
+namespace af
+{
+    namespace nn
+    {
+        using namespace autograd;
+
+        Container::Container() {}
+
+        ModulePtr Container::get(int id)
+        {
+            return m_modules[id];
+        }
+
+        std::vector<ModulePtr> Container::modules()
+        {
+            return m_modules;
+        }
+
+        Sequential::Sequential() {}
+
+        Variable Sequential::forward(const Variable &input)
+        {
+            Variable output = input;
+            for(auto module : m_modules) {
+                output = module->forward(output);
+            }
+            return output;
+        }
+    }
+}
diff --git a/src/nn/Modules/Linear.cpp b/src/nn/Modules/Linear.cpp
new file mode 100644
index 0000000..c289ea5
--- /dev/null
+++ b/src/nn/Modules/Linear.cpp
@@ -0,0 +1,59 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+#include <af/autograd/Functions.hpp>
+
+#include <af/nn/Types.hpp>
+#include <af/nn/Modules/Linear.hpp>
+
+namespace af
+{
+    namespace nn
+    {
+        using namespace autograd;
+
+        Linear::Linear(int input_size, int output_size, bool bias, float spread) :
+            m_bias(bias)
+        {
+            auto w = nn::weight(input_size, output_size, spread);
+            if (bias) {
+                auto b = nn::weight(1, output_size, spread);
+                setParams({w, b});
+            } else {
+                setParams({w});
+            }
+        }
+
+        Linear::Linear(const Variable &w) :
+            m_bias(false),
+            Module({w})
+        {
+        }
+
+        Linear::Linear(const Variable &w, const Variable &b) :
+            m_bias(true),
+            Module({w, b})
+        {
+            if (b.array().dims(0) != w.array().dims(0)) {
+                throw af::exception("nn:Linear: Dimension mismatch between weight and bias.");
+            }
+            if (b.array().dims(1) != 1) {
+                throw af::exception("nn::Linear: Bias must be a vector.");
+            }
+        }
+
+        Variable Linear::forward(const Variable &input)
+        {
+            auto res = matmul(m_parameters[0], input);
+            if (m_bias) {
+                res = res + expandAs(m_parameters[1], res);
+            }
+            return res;
+        }
+    }
+}
diff --git a/src/nn/Modules/Module.cpp b/src/nn/Modules/Module.cpp
new file mode 100644
index 0000000..0232197
--- /dev/null
+++ b/src/nn/Modules/Module.cpp
@@ -0,0 +1,48 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <af/nn/Modules/Module.hpp>
+
+namespace af
+{
+    namespace nn
+    {
+        using autograd::Variable;
+        Module::Module() :
+            m_parameters()
+        {
+        }
+
+        Module::Module(const std::vector<Variable> &parameters) :
+            m_parameters(parameters.begin(), parameters.end())
+        {
+        }
+
+        void Module::setParams(const std::vector<Variable> &parameters)
+        {
+            m_parameters.clear();
+            for(auto parameter : parameters) {
+                m_parameters.push_back(parameter);
+            }
+        }
+
+
+        std::vector<Variable> Module::parameters()
+        {
+            return m_parameters;
+        }
+
+        void Module::zeroGrad()
+        {
+            for (auto &parameter : m_parameters) {
+                parameter.zeroGrad();
+            }
+        }
+    }
+}
diff --git a/src/nn/Types.cpp b/src/nn/Types.cpp
new file mode 100644
index 0000000..698b497
--- /dev/null
+++ b/src/nn/Types.cpp
@@ -0,0 +1,36 @@
+/*******************************************************
+ * Copyright (c) 2017, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <cmath>
+
+#include <af/nn/Types.hpp>
+
+namespace af {
+    namespace nn {
+
+        using autograd::Variable;
+
+        Variable input(const af::array &arr)
+        {
+            return Variable(arr, false);
+        }
+
+        Variable parameter(const af::array &arr)
+        {
+            return Variable(arr, true);
+        }
+
+        Variable weight(int input_size, int output_size, float spread)
+        {
+            auto w = af::randu(output_size, input_size) * spread - spread / 2;
+            w.eval();
+            return parameter(w);
+        }
+    }
+}

From 9aefea4a47ec3fc333a6f2a8e4d2c6bdc5169493 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Thu, 6 Jul 2017 00:47:59 -0700
Subject: [PATCH 16/20] Fixing bugs in backward pass for activation functions

---
 src/autograd/Functions.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp
index fcabaaa..71048b6 100644
--- a/src/autograd/Functions.cpp
+++ b/src/autograd/Functions.cpp
@@ -104,7 +104,7 @@ namespace af {
         {
             auto result = exp(input.array());
             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
-                inputs[0].addGrad(exp(inputs[0]));
+                inputs[0].addGrad(grad_output * exp(inputs[0]));
             };
             return Variable(result, {input}, grad_func);
         }
@@ -113,7 +113,7 @@ namespace af {
         {
             auto result = sin(input.array());
             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
-                inputs[0].addGrad(cos(inputs[0]));
+                inputs[0].addGrad(grad_output * cos(inputs[0]));
             };
             return Variable(result, {input}, grad_func);
         }
@@ -122,7 +122,7 @@ namespace af {
         {
             auto result = cos(input.array());
             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
-                inputs[0].addGrad(negate(sin(inputs[0])));
+                inputs[0].addGrad(grad_output * negate(sin(inputs[0])));
             };
             return Variable(result, {input}, grad_func);
         }
@@ -132,7 +132,7 @@ namespace af {
             auto result = tanh(input.array());
             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
                 auto tmp = tanh(inputs[0]);
-                inputs[0].addGrad(1.0 - tmp * tmp);
+                inputs[0].addGrad(grad_output * (1.0 - tmp * tmp));
             };
             return Variable(result, {input}, grad_func);
         }
@@ -142,7 +142,7 @@ namespace af {
             auto result = sigmoid(input.array());
             auto grad_func = [](std::vector<Variable> &inputs, const Variable &grad_output) {
                 auto tmp = sigmoid(inputs[0]);
-                inputs[0].addGrad(tmp * (1 - tmp));
+                inputs[0].addGrad(grad_output * tmp * (1 - tmp));
             };
             return Variable(result, {input}, grad_func);
         }

From 6d5751a853f2909b23b5aeb99aba54ced71dd18b Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Thu, 6 Jul 2017 00:48:23 -0700
Subject: [PATCH 17/20] Fixing perceptron example to use smaller batch size

- Trying to solve for the entire batch was a bad idea
---
 examples/perceptron.cpp | 52 +++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp
index 799c52a..0ad8cc0 100644
--- a/examples/perceptron.cpp
+++ b/examples/perceptron.cpp
@@ -18,8 +18,8 @@ int main()
 {
     const int inputSize  = 2;
     const int outputSize = 1;
-    const int numSamples = 4;
     const double lr = 0.005;
+    const int numSamples = 4;
 
     float hInput[] = {1, 1,
                       0, 0,
@@ -40,27 +40,45 @@ int main()
     perceptron.add(nn::Sigmoid());
 
     Variable result;
-    for (int i = 0; i < 10; i++) {
+    for (int i = 0; i < 100; i++) {
+        for (int j = 0; j < numSamples; j++) {
+
+            af::array in_j = in(af::span, j);
+            af::array out_j = out(af::span, j);
 
-        // Forward propagation
-        result = perceptron.forward(nn::input(in));
+            // Forward propagation
+            result = perceptron.forward(nn::input(in_j));
 
-        // Calculate loss
-        // TODO: Use loss function
-        af::array diff = out - result.array();
-        printf("Error at iteration(%d) : %lf\n", i + 1, af::max<float>(af::abs(diff)));
+            // Calculate loss
+            // TODO: Use loss function
+            af::array diff = out_j - result.array();
+
+            // Backward propagation
+            auto d_result = Variable(diff, false);
+            result.backward(d_result);
+
+            // Update parameters
+            // TODO: Should use optimizer
+            for (auto param : perceptron.parameters()) {
+                param.array() += lr * param.grad().array();
+                param.array().eval();
+            }
+        }
 
-        // Backward propagation
-        auto d_result = Variable(diff, false);
-        result.backward(d_result);
+        if ((i + 1) % 10 == 0) {
+            // Forward propagation
+            result = perceptron.forward(nn::input(in));
 
-        // Update parameters
-        // TODO: Should use optimizer
-        for (auto param : perceptron.parameters()) {
-            param.array() += lr * param.grad().array();
-            param.array().eval();
+            // Calculate loss
+            // TODO: Use loss function
+            af::array diff = out - result.array();
+            printf("Average Error at iteration(%d) : %lf\n", i + 1, af::mean<float>(af::abs(diff)));
+            printf("Predicted\n");
+            af_print(result.array());
+            printf("Expected\n");
+            af_print(out);
+            printf("\n\n");
         }
     }
-    af_print(result.array());
     return 0;
 }

From a01504b84ca5aaa9cb59b4ff79f568ddc4454a1d Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Thu, 6 Jul 2017 01:10:53 -0700
Subject: [PATCH 18/20] Adding model.eval() and model.train()

---
 examples/perceptron.cpp          | 10 +++++++---
 include/af/nn/Modules/Module.hpp |  4 ++++
 src/nn/Modules/Module.cpp        | 13 +++++++++++++
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp
index 0ad8cc0..96c14af 100644
--- a/examples/perceptron.cpp
+++ b/examples/perceptron.cpp
@@ -18,7 +18,7 @@ int main()
 {
     const int inputSize  = 2;
     const int outputSize = 1;
-    const double lr = 0.005;
+    const double lr = 0.1;
     const int numSamples = 4;
 
     float hInput[] = {1, 1,
@@ -40,8 +40,10 @@ int main()
     perceptron.add(nn::Sigmoid());
 
     Variable result;
-    for (int i = 0; i < 100; i++) {
+    for (int i = 0; i < 1000; i++) {
         for (int j = 0; j < numSamples; j++) {
+            perceptron.train();
+            perceptron.zeroGrad();
 
             af::array in_j = in(af::span, j);
             af::array out_j = out(af::span, j);
@@ -65,7 +67,9 @@ int main()
             }
         }
 
-        if ((i + 1) % 10 == 0) {
+        if ((i + 1) % 100 == 0) {
+            perceptron.eval();
+
             // Forward propagation
             result = perceptron.forward(nn::input(in));
 
diff --git a/include/af/nn/Modules/Module.hpp b/include/af/nn/Modules/Module.hpp
index a35db1f..fb1e946 100644
--- a/include/af/nn/Modules/Module.hpp
+++ b/include/af/nn/Modules/Module.hpp
@@ -35,6 +35,10 @@ namespace af
 
             void zeroGrad();
 
+            void train();
+
+            void eval();
+
             virtual autograd::Variable forward(const autograd::Variable &input) = 0;
         };
     }
diff --git a/src/nn/Modules/Module.cpp b/src/nn/Modules/Module.cpp
index 0232197..9db16b9 100644
--- a/src/nn/Modules/Module.cpp
+++ b/src/nn/Modules/Module.cpp
@@ -32,6 +32,19 @@ namespace af
             }
         }
 
+        void Module::train()
+        {
+            for (auto parameter : m_parameters) {
+                parameter.setCalcGrad(true);
+            }
+        }
+
+        void Module::eval()
+        {
+            for (auto parameter : m_parameters) {
+                parameter.setCalcGrad(false);
+            }
+        }
 
         std::vector<Variable> Module::parameters()
         {

From 2776aa2c03ae66e502ac6a10a21c9cd673eff5a8 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Thu, 6 Jul 2017 08:46:40 -0700
Subject: [PATCH 19/20] Formatting changes

---
 examples/perceptron.cpp          | 2 +-
 include/af/autograd/Variable.hpp | 2 +-
 src/nn/Modules/Module.cpp        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp
index 96c14af..ffe82d7 100644
--- a/examples/perceptron.cpp
+++ b/examples/perceptron.cpp
@@ -1,5 +1,5 @@
 /*******************************************************
- * Copyright (c) 2015, ArrayFire
+ * Copyright (c) 2017, ArrayFire
  * All rights reserved.
  *
  * This file is distributed under 3-clause BSD license.
diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index 8ce10e2..f4deb5f 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -42,7 +42,7 @@ namespace af {
                 GradFunc_t m_grad_func;
             };
 
-            public:
+        public:
 
             Variable();
             Variable(const af::array &data, bool calc_grad);
diff --git a/src/nn/Modules/Module.cpp b/src/nn/Modules/Module.cpp
index 9db16b9..e350f1d 100644
--- a/src/nn/Modules/Module.cpp
+++ b/src/nn/Modules/Module.cpp
@@ -1,5 +1,5 @@
 /*******************************************************
- * Copyright (c) 2015, ArrayFire
+ * Copyright (c) 2017, ArrayFire
  * All rights reserved.
  *
  * This file is distributed under 3-clause BSD license.

From 04cd450b347b1e782fbdf1ed25033c143daa2af6 Mon Sep 17 00:00:00 2001
From: Pavan Yalamanchili <contact@pavanky.com>
Date: Thu, 6 Jul 2017 08:50:48 -0700
Subject: [PATCH 20/20] Use references while iterating when possible

---
 examples/perceptron.cpp          | 2 +-
 include/af/autograd/Variable.hpp | 4 ++--
 src/autograd/Variable.cpp        | 8 ++++----
 src/nn/Modules/Container.cpp     | 2 +-
 src/nn/Modules/Module.cpp        | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp
index ffe82d7..d8e7f39 100644
--- a/examples/perceptron.cpp
+++ b/examples/perceptron.cpp
@@ -61,7 +61,7 @@ int main()
 
             // Update parameters
             // TODO: Should use optimizer
-            for (auto param : perceptron.parameters()) {
+            for (auto &param : perceptron.parameters()) {
                 param.array() += lr * param.grad().array();
                 param.array().eval();
             }
diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp
index f4deb5f..330c37f 100644
--- a/include/af/autograd/Variable.hpp
+++ b/include/af/autograd/Variable.hpp
@@ -52,7 +52,7 @@ namespace af {
 
             af::array& array() const;
 
-            Variable grad() const;
+            Variable& grad() const;
 
             std::ptrdiff_t id() const;
 
@@ -74,7 +74,7 @@ namespace af {
         private:
             void evalGrad(bool retain_grad_graph = false);
 
-            std::vector<Variable> getInputs() const;
+            std::vector<Variable>& getInputs() const;
 
             static void buildSubGraph(Cache_t &cache, DAG_t &dag, const Variable &var);
 
diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp
index ad89ce1..9ff55c5 100644
--- a/src/autograd/Variable.cpp
+++ b/src/autograd/Variable.cpp
@@ -55,7 +55,7 @@ namespace af {
             m_shared(nullptr)
         {
             bool calc_grad = false;
-            for (auto input : inputs) {
+            for (const auto &input : inputs) {
                 calc_grad |= input.isCalcGrad();
             }
             if (calc_grad) {
@@ -70,7 +70,7 @@ namespace af {
             return m_shared->m_data;
         }
 
-        Variable Variable::grad() const
+        Variable& Variable::grad() const
         {
             if (!m_shared->m_calc_grad) {
                 throw af::exception("Gradient calclation disabled.");
@@ -86,7 +86,7 @@ namespace af {
             return (std::ptrdiff_t)m_shared.get();
         }
 
-        std::vector<Variable> Variable::getInputs() const
+        std::vector<Variable>& Variable::getInputs() const
         {
             return m_shared->m_inputs;
         }
@@ -181,7 +181,7 @@ namespace af {
             if (cache.find(id) != cache.end()) {
                 return;
             }
-            for (auto input : var.getInputs()) {
+            for (const auto &input : var.getInputs()) {
                 Variable::buildSubGraph(cache, dag, input);
             }
             cache[id] = true;
diff --git a/src/nn/Modules/Container.cpp b/src/nn/Modules/Container.cpp
index fbccc22..9078631 100644
--- a/src/nn/Modules/Container.cpp
+++ b/src/nn/Modules/Container.cpp
@@ -33,7 +33,7 @@ namespace af
         Variable Sequential::forward(const Variable &input)
         {
             Variable output = input;
-            for(auto module : m_modules) {
+            for (auto &module : m_modules) {
                 output = module->forward(output);
             }
             return output;
diff --git a/src/nn/Modules/Module.cpp b/src/nn/Modules/Module.cpp
index e350f1d..5126218 100644
--- a/src/nn/Modules/Module.cpp
+++ b/src/nn/Modules/Module.cpp
@@ -34,14 +34,14 @@ namespace af
 
         void Module::train()
         {
-            for (auto parameter : m_parameters) {
+            for (auto &parameter : m_parameters) {
                 parameter.setCalcGrad(true);
             }
         }
 
         void Module::eval()
         {
-            for (auto parameter : m_parameters) {
+            for (auto &parameter : m_parameters) {
                 parameter.setCalcGrad(false);
             }
         }