From 5ed7a88a2ac5578d95a768493df5eccf184eefeb Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Sat, 1 Jul 2017 23:33:03 -0700 Subject: [PATCH 01/20] Reorganizing the CMake files --- AFBuildMacros.cmake | 65 ------------------------------ CMakeLists.txt | 31 ++++---------- {test => examples}/Activations.cpp | 0 examples/CMakeLists.txt | 22 ++++++++++ {test => examples}/FFNet.cpp | 1 + {test => examples}/Node.cpp | 0 {test => examples}/Weights.cpp | 0 {test => examples}/perceptron.cpp | 0 8 files changed, 30 insertions(+), 89 deletions(-) delete mode 100644 AFBuildMacros.cmake rename {test => examples}/Activations.cpp (100%) create mode 100644 examples/CMakeLists.txt rename {test => examples}/FFNet.cpp (98%) rename {test => examples}/Node.cpp (100%) rename {test => examples}/Weights.cpp (100%) rename {test => examples}/perceptron.cpp (100%) diff --git a/AFBuildMacros.cmake b/AFBuildMacros.cmake deleted file mode 100644 index 64ea17e..0000000 --- a/AFBuildMacros.cmake +++ /dev/null @@ -1,65 +0,0 @@ -# A macro to build an ArrayFire example -# For most uses only FIND_PACKAGE(ArrayFire REQUIRED), ADD_EXECUTABLE(...) -# and TARGET_LINK_LIBRARIES(... ${ARRAYFIRE_LIBRARIES}) are needed -MACRO(BUILD_SRC SRC_NAME SRC_SOURCE BACKEND_NAME BACKEND_LIBRARIES OTHER_LIBRARIES OUT_DIR_NAME) - - ADD_EXECUTABLE(example_${SRC_NAME}_${BACKEND_NAME} ${SRC_SOURCE}) - - TARGET_LINK_LIBRARIES(example_${SRC_NAME}_${BACKEND_NAME} - ${BACKEND_LIBRARIES} ${OTHER_LIBRARIES}) - - SET_TARGET_PROPERTIES(example_${SRC_NAME}_${BACKEND_NAME} - PROPERTIES - OUTPUT_NAME ${SRC_NAME}_${BACKEND_NAME} - RUNTIME_OUTPUT_DIRECTORY ${OUT_DIR_NAME} - FOLDER "${BACKEND_NAME}") -ENDMACRO() - -# A macro to build a list of files -# For most uses only FIND_PACKAGE(ArrayFire REQUIRED), ADD_EXECUTABLE(...) -# and TARGET_LINK_LIBRARIES(... ${ARRAYFIRE_LIBRARIES}) are needed -MACRO(BUILD_BACKEND FILES BACKEND_NAME BACKEND_LIBRARIES OTHER_LIBRARIES) - - FOREACH(FILE ${FILES}) - GET_FILENAME_COMPONENT(SRC ${FILE} NAME_WE) - GET_FILENAME_COMPONENT(FULL_DIR_NAME ${FILE} PATH) - GET_FILENAME_COMPONENT(DIR_NAME ${FULL_DIR_NAME} NAME) - - BUILD_SRC(${SRC} ${FILE} ${BACKEND_NAME} - "${BACKEND_LIBRARIES}" - "${OTHER_LIBRARIES}" ${DIR_NAME}) - ENDFOREACH() -ENDMACRO() - -MACRO(BUILD_ALL FILES) - FIND_PACKAGE(ArrayFire REQUIRED) - FIND_PACKAGE(CUDA QUIET) - FIND_PACKAGE(OpenCL QUIET) - - INCLUDE_DIRECTORIES( - "${CMAKE_CURRENT_SOURCE_DIR}/include" - ${ArrayFire_INCLUDE_DIRS} - ) - - IF (${ArrayFire_CPU_FOUND}) - MESSAGE(STATUS "CPU backend is ON.") - BUILD_BACKEND("${FILES}" cpu ${ArrayFire_CPU_LIBRARIES} "") - ENDIF() - - IF (${OpenCL_FOUND} AND ${ArrayFire_OpenCL_FOUND}) - MESSAGE(STATUS "OPENCL backend is ON.") - BUILD_BACKEND("${FILES}" opencl ${ArrayFire_OpenCL_LIBRARIES} "${OpenCL_LIBRARIES}") - ENDIF() - - IF (${CUDA_FOUND} AND ${ArrayFire_CUDA_FOUND}) - FIND_LIBRARY( CUDA_NVVM_LIBRARY - NAMES "nvvm" - PATH_SUFFIXES "nvvm/lib64" "nvvm/lib" - PATHS ${CUDA_TOOLKIT_ROOT_DIR} - DOC "CUDA NVVM Library" - ) - - MESSAGE(STATUS "CUDA backend is ON.") - BUILD_BACKEND("${FILES}" cuda ${ArrayFire_CUDA_LIBRARIES} "${CUDA_CUBLAS_LIBRARIES};${CUDA_LIBRARIES};${CUDA_cusolver_LIBRARY};${CUDA_CUFFT_LIBRARIES};${CUDA_NVVM_LIBRARY};${CUDA_CUDA_LIBRARY}") - ENDIF() -ENDMACRO() diff --git a/CMakeLists.txt b/CMakeLists.txt index d03e555..104d635 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,26 +1,9 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.8) -PROJECT(ARRAYFIRE_ML) +cmake_minimum_required(VERSION 3.5.2) -SET_PROPERTY(GLOBAL PROPERTY USE_FOLDERS ON) -ADD_DEFINITIONS(-Wall -std=c++11 -fvisibility=hidden) +project(ArrayFireML + VERSION 0.1.0 + LANGUAGES C CXX) -OPTION(BUILD_TEST "Build Tests" ON) - -# Header files -IF(NOT DEFINED AFML_INSTALL_INC_DIR) - SET(AFML_INSTALL_INC_DIR "include" CACHE PATH "Installation path for headers") -ENDIF() - -IF (BUILD_TEST) - FILE(GLOB FILES "test/*.cpp") - INCLUDE("${CMAKE_CURRENT_SOURCE_DIR}/AFBuildMacros.cmake") - BUILD_ALL("${FILES}") -ENDIF() - -INSTALL(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/" DESTINATION "${AFML_INSTALL_INC_DIR}" - COMPONENT headers - FILES_MATCHING - PATTERN "*.h" - PATTERN "*.hpp" - PATTERN ".gitignore" EXCLUDE -) +find_package(ArrayFire REQUIRED) +set(ArrayFireML_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) +add_subdirectory(examples) diff --git a/test/Activations.cpp b/examples/Activations.cpp similarity index 100% rename from test/Activations.cpp rename to examples/Activations.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..fd22342 --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,22 @@ +function(build_example SRC) + get_filename_component(src_name ${SRC} NAME_WE) + set(target "${src_name}") + add_executable(${target} ${SRC}) + target_include_directories(${target} + PRIVATE + ${ArrayFire_INCLUDE_DIRS} + ${ArrayFireML_INCLUDE_DIRS} + ) + target_link_libraries(${target} + PRIVATE + af + ) + target_compile_features(${target} + PRIVATE cxx_range_for) +endfunction(build_example) + +build_example(Activations.cpp) +build_example(FFNet.cpp) +build_example(Node.cpp) +build_example(perceptron.cpp) +build_example(Weights.cpp) diff --git a/test/FFNet.cpp b/examples/FFNet.cpp similarity index 98% rename from test/FFNet.cpp rename to examples/FFNet.cpp index b9c9b62..93fb6bf 100644 --- a/test/FFNet.cpp +++ b/examples/FFNet.cpp @@ -15,6 +15,7 @@ using namespace afml::nn; int main() { + af::info(); const int inputSize = 2; const int hiddenSize = 3; const int outputSize = 1; diff --git a/test/Node.cpp b/examples/Node.cpp similarity index 100% rename from test/Node.cpp rename to examples/Node.cpp diff --git a/test/Weights.cpp b/examples/Weights.cpp similarity index 100% rename from test/Weights.cpp rename to examples/Weights.cpp diff --git a/test/perceptron.cpp b/examples/perceptron.cpp similarity index 100% rename from test/perceptron.cpp rename to examples/perceptron.cpp From dfa8fda802688db5abe6cdb58d186f16b470f058 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Sun, 2 Jul 2017 01:01:04 -0700 Subject: [PATCH 02/20] Reorganizing the include files and namespace --- examples/Activations.cpp | 8 ++++---- examples/FFNet.cpp | 6 +++--- examples/Node.cpp | 4 ++-- examples/Weights.cpp | 4 ++-- examples/perceptron.cpp | 6 +++--- include/{afml.h => af/nn.h} | 7 ++++++- include/{afml/nn.h => af/nn/Activations.hpp} | 11 +++++------ .../{afml => af}/nn/Activations/Activation.hpp | 6 +++--- include/{afml => af}/nn/Activations/ReLU.hpp | 6 +++--- include/{afml => af}/nn/Activations/Sigmoid.hpp | 4 ++-- include/{afml => af}/nn/Activations/Tanh.hpp | 4 ++-- include/{afml => af}/nn/Activations/Threshold.hpp | 4 ++-- include/{afml => af}/nn/Networks.hpp | 2 +- include/{afml => af}/nn/Networks/FFNet.hpp | 10 +++++----- include/{afml => af}/nn/Nodes.hpp | 4 ++-- include/{afml => af}/nn/Nodes/Linear.hpp | 8 ++++---- include/{afml => af}/nn/Nodes/Node.hpp | 6 +++--- include/{afml => af}/nn/Weights.hpp | 4 ++-- include/{afml/util => af/nn}/common.hpp | 2 +- include/afml/nn/Activations.hpp | 15 --------------- 20 files changed, 55 insertions(+), 66 deletions(-) rename include/{afml.h => af/nn.h} (70%) rename include/{afml/nn.h => af/nn/Activations.hpp} (62%) rename include/{afml => af}/nn/Activations/Activation.hpp (93%) rename include/{afml => af}/nn/Activations/ReLU.hpp (79%) rename include/{afml => af}/nn/Activations/Sigmoid.hpp (94%) rename include/{afml => af}/nn/Activations/Tanh.hpp (93%) rename include/{afml => af}/nn/Activations/Threshold.hpp (94%) rename include/{afml => af}/nn/Networks.hpp (90%) rename include/{afml => af}/nn/Networks/FFNet.hpp (95%) rename include/{afml => af}/nn/Nodes.hpp (83%) rename include/{afml => af}/nn/Nodes/Linear.hpp (94%) rename include/{afml => af}/nn/Nodes/Node.hpp (97%) rename include/{afml => af}/nn/Weights.hpp (98%) rename include/{afml/util => af/nn}/common.hpp (97%) delete mode 100644 include/afml/nn/Activations.hpp diff --git a/examples/Activations.cpp b/examples/Activations.cpp index 098c864..594bd5f 100644 --- a/examples/Activations.cpp +++ b/examples/Activations.cpp @@ -7,16 +7,16 @@ * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ -#include +#include -using namespace afml::nn; +using namespace af::nn; int main() { const int num = 5; - afml::ArrayVector in = {100 * af::randu(num, 1) - 50}; - afml::ArrayVector grad = {100 * af::randu(num, 1)}; + af::ArrayVector in = {100 * af::randu(num, 1) - 50}; + af::ArrayVector grad = {100 * af::randu(num, 1)}; ReLU r = ReLU(num, 0); Sigmoid s = Sigmoid(num); diff --git a/examples/FFNet.cpp b/examples/FFNet.cpp index 93fb6bf..ec69ed1 100644 --- a/examples/FFNet.cpp +++ b/examples/FFNet.cpp @@ -7,11 +7,11 @@ * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ -#include +#include using namespace af; -using namespace afml; -using namespace afml::nn; +using namespace af; +using namespace af::nn; int main() { diff --git a/examples/Node.cpp b/examples/Node.cpp index 33a0a90..eb4229a 100644 --- a/examples/Node.cpp +++ b/examples/Node.cpp @@ -7,9 +7,9 @@ * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ -#include +#include -using namespace afml::nn; +using namespace af::nn; int main() { diff --git a/examples/Weights.cpp b/examples/Weights.cpp index 8f65895..9e83c06 100644 --- a/examples/Weights.cpp +++ b/examples/Weights.cpp @@ -7,9 +7,9 @@ * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ -#include +#include -using namespace afml::nn; +using namespace af::nn; int main() { diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp index 213378c..9975a03 100644 --- a/examples/perceptron.cpp +++ b/examples/perceptron.cpp @@ -7,10 +7,10 @@ * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ -#include +#include -using namespace afml; -using namespace afml::nn; +using namespace af; +using namespace af::nn; int main() { diff --git a/include/afml.h b/include/af/nn.h similarity index 70% rename from include/afml.h rename to include/af/nn.h index 279f7aa..8ee2134 100644 --- a/include/afml.h +++ b/include/af/nn.h @@ -8,4 +8,9 @@ ********************************************************/ #pragma once -#include + +#include +#include +#include +#include +#include diff --git a/include/afml/nn.h b/include/af/nn/Activations.hpp similarity index 62% rename from include/afml/nn.h rename to include/af/nn/Activations.hpp index 98a7a4b..4972238 100644 --- a/include/afml/nn.h +++ b/include/af/nn/Activations.hpp @@ -6,11 +6,10 @@ * The complete license agreement can be obtained at: * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ - #pragma once -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include diff --git a/include/afml/nn/Activations/Activation.hpp b/include/af/nn/Activations/Activation.hpp similarity index 93% rename from include/afml/nn/Activations/Activation.hpp rename to include/af/nn/Activations/Activation.hpp index 52e0346..d98eaf3 100644 --- a/include/afml/nn/Activations/Activation.hpp +++ b/include/af/nn/Activations/Activation.hpp @@ -8,10 +8,10 @@ ********************************************************/ #pragma once -#include -#include +#include +#include -namespace afml +namespace af { namespace nn diff --git a/include/afml/nn/Activations/ReLU.hpp b/include/af/nn/Activations/ReLU.hpp similarity index 79% rename from include/afml/nn/Activations/ReLU.hpp rename to include/af/nn/Activations/ReLU.hpp index 55b3703..3c47684 100644 --- a/include/afml/nn/Activations/ReLU.hpp +++ b/include/af/nn/Activations/ReLU.hpp @@ -8,10 +8,10 @@ ********************************************************/ #pragma once -#include -#include +#include +#include -namespace afml +namespace af { namespace nn { diff --git a/include/afml/nn/Activations/Sigmoid.hpp b/include/af/nn/Activations/Sigmoid.hpp similarity index 94% rename from include/afml/nn/Activations/Sigmoid.hpp rename to include/af/nn/Activations/Sigmoid.hpp index 6a45cac..1fd31d5 100644 --- a/include/afml/nn/Activations/Sigmoid.hpp +++ b/include/af/nn/Activations/Sigmoid.hpp @@ -8,9 +8,9 @@ ********************************************************/ #pragma once -#include +#include -namespace afml +namespace af { namespace nn { diff --git a/include/afml/nn/Activations/Tanh.hpp b/include/af/nn/Activations/Tanh.hpp similarity index 93% rename from include/afml/nn/Activations/Tanh.hpp rename to include/af/nn/Activations/Tanh.hpp index da5e7ef..abbcd07 100644 --- a/include/afml/nn/Activations/Tanh.hpp +++ b/include/af/nn/Activations/Tanh.hpp @@ -8,9 +8,9 @@ ********************************************************/ #pragma once -#include +#include -namespace afml +namespace af { namespace nn { diff --git a/include/afml/nn/Activations/Threshold.hpp b/include/af/nn/Activations/Threshold.hpp similarity index 94% rename from include/afml/nn/Activations/Threshold.hpp rename to include/af/nn/Activations/Threshold.hpp index 75dfc86..bded798 100644 --- a/include/afml/nn/Activations/Threshold.hpp +++ b/include/af/nn/Activations/Threshold.hpp @@ -8,9 +8,9 @@ ********************************************************/ #pragma once -#include +#include -namespace afml +namespace af { namespace nn { diff --git a/include/afml/nn/Networks.hpp b/include/af/nn/Networks.hpp similarity index 90% rename from include/afml/nn/Networks.hpp rename to include/af/nn/Networks.hpp index d6a08ca..2517121 100644 --- a/include/afml/nn/Networks.hpp +++ b/include/af/nn/Networks.hpp @@ -8,4 +8,4 @@ ********************************************************/ #pragma once -#include +#include diff --git a/include/afml/nn/Networks/FFNet.hpp b/include/af/nn/Networks/FFNet.hpp similarity index 95% rename from include/afml/nn/Networks/FFNet.hpp rename to include/af/nn/Networks/FFNet.hpp index 2308c02..1e00f18 100644 --- a/include/afml/nn/Networks/FFNet.hpp +++ b/include/af/nn/Networks/FFNet.hpp @@ -8,12 +8,12 @@ ********************************************************/ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include -namespace afml +namespace af { namespace nn { diff --git a/include/afml/nn/Nodes.hpp b/include/af/nn/Nodes.hpp similarity index 83% rename from include/afml/nn/Nodes.hpp rename to include/af/nn/Nodes.hpp index df6bffc..a4f7c99 100644 --- a/include/afml/nn/Nodes.hpp +++ b/include/af/nn/Nodes.hpp @@ -8,5 +8,5 @@ ********************************************************/ #pragma once -#include -#include +#include +#include diff --git a/include/afml/nn/Nodes/Linear.hpp b/include/af/nn/Nodes/Linear.hpp similarity index 94% rename from include/afml/nn/Nodes/Linear.hpp rename to include/af/nn/Nodes/Linear.hpp index 479ac2b..9ad2c85 100644 --- a/include/afml/nn/Nodes/Linear.hpp +++ b/include/af/nn/Nodes/Linear.hpp @@ -8,11 +8,11 @@ ********************************************************/ #pragma once -#include -#include -#include +#include +#include +#include -namespace afml +namespace af { namespace nn { diff --git a/include/afml/nn/Nodes/Node.hpp b/include/af/nn/Nodes/Node.hpp similarity index 97% rename from include/afml/nn/Nodes/Node.hpp rename to include/af/nn/Nodes/Node.hpp index 4e522d0..ec4eb12 100644 --- a/include/afml/nn/Nodes/Node.hpp +++ b/include/af/nn/Nodes/Node.hpp @@ -8,13 +8,13 @@ ********************************************************/ #pragma once -#include -#include +#include +#include #include #include -namespace afml +namespace af { namespace nn { diff --git a/include/afml/nn/Weights.hpp b/include/af/nn/Weights.hpp similarity index 98% rename from include/afml/nn/Weights.hpp rename to include/af/nn/Weights.hpp index 7250ba7..3efe645 100644 --- a/include/afml/nn/Weights.hpp +++ b/include/af/nn/Weights.hpp @@ -8,9 +8,9 @@ ********************************************************/ #pragma once -#include +#include -namespace afml +namespace af { namespace nn { diff --git a/include/afml/util/common.hpp b/include/af/nn/common.hpp similarity index 97% rename from include/afml/util/common.hpp rename to include/af/nn/common.hpp index a9d7123..ae6f9c7 100644 --- a/include/afml/util/common.hpp +++ b/include/af/nn/common.hpp @@ -11,7 +11,7 @@ #include #include -namespace afml +namespace af { const int MAX_NAME_SIZE = 32; diff --git a/include/afml/nn/Activations.hpp b/include/afml/nn/Activations.hpp deleted file mode 100644 index 46d7907..0000000 --- a/include/afml/nn/Activations.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include -#include -#include -#include From 562b8609a28db082c41155c277d60a85ae267070 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Sun, 2 Jul 2017 11:54:46 -0700 Subject: [PATCH 03/20] First attempt at implementing autograd --- examples/CMakeLists.txt | 1 + examples/autograd.cpp | 44 ++++++++ include/af/autograd.h | 11 ++ include/af/autograd/Functions.hpp | 38 +++++++ include/af/autograd/Grad.hpp | 26 +++++ include/af/autograd/Variable.hpp | 176 ++++++++++++++++++++++++++++++ 6 files changed, 296 insertions(+) create mode 100644 examples/autograd.cpp create mode 100644 include/af/autograd.h create mode 100644 include/af/autograd/Functions.hpp create mode 100644 include/af/autograd/Grad.hpp create mode 100644 include/af/autograd/Variable.hpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index fd22342..9a39b82 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -20,3 +20,4 @@ build_example(FFNet.cpp) build_example(Node.cpp) build_example(perceptron.cpp) build_example(Weights.cpp) +build_example(autograd.cpp) diff --git a/examples/autograd.cpp b/examples/autograd.cpp new file mode 100644 index 0000000..2f0f037 --- /dev/null +++ b/examples/autograd.cpp @@ -0,0 +1,44 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include + +using af::autograd::Variable; +using af::autograd::backward; +void test1() +{ + auto x = Variable(af::randu(5)); + af_print(x.getData()); + auto y = x * x; + af_print(y.getData()); + auto dy = Variable(af::constant(1.0, 5)); + backward(y, dy); + af_print(x.getGrad() - 2 * x.getData()); +} + +void test2() +{ + auto x = Variable(af::randu(5)); + af_print(x.getData()); + auto y = Variable(af::randu(5)); + af_print(y.getData()); + auto z = x * x + x * y + y * y; + auto dz = Variable(af::constant(1.0, 5)); + backward(z, dz); + af_print(x.getGrad() - 2 * x.getData() - y.getData()); + af_print(y.getGrad() - 2 * y.getData() - x.getData()); +} + +int main() +{ + af::info(); + test1(); + test2(); + return 0; +} diff --git a/include/af/autograd.h b/include/af/autograd.h new file mode 100644 index 0000000..e85625c --- /dev/null +++ b/include/af/autograd.h @@ -0,0 +1,11 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#include +#include +#include diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp new file mode 100644 index 0000000..0985ae8 --- /dev/null +++ b/include/af/autograd/Functions.hpp @@ -0,0 +1,38 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include + +namespace af { + namespace autograd { + + Variable operator +(const Variable lhs, const Variable rhs) + { + auto result = lhs.getData() + rhs.getData(); + auto backward = [](std::vector inputs, Variable grad_output) { + inputs[0].addGrad(grad_output); + inputs[1].addGrad(grad_output); + }; + return Variable(result, {lhs, rhs}, backward); + } + + Variable operator *(const Variable lhs, const Variable rhs) + { + auto result = lhs.getData() * rhs.getData(); + auto backward = [](std::vector inputs, Variable grad_output) { + inputs[0].addGrad(grad_output * inputs[1]); + inputs[1].addGrad(grad_output * inputs[0]); + }; + return Variable(result, {lhs, rhs}, backward); + } + + } + namespace ag = autograd; +} diff --git a/include/af/autograd/Grad.hpp b/include/af/autograd/Grad.hpp new file mode 100644 index 0000000..e1c2114 --- /dev/null +++ b/include/af/autograd/Grad.hpp @@ -0,0 +1,26 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include + +namespace af { + namespace autograd { + + void backward(Variable var, Variable grad) + { + var.addGrad(grad); + Variable::DAG_t dag = var.build(); + for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) { + iter->backward(); + } + } + } + namespace ag = autograd; +} diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp new file mode 100644 index 0000000..6d87250 --- /dev/null +++ b/include/af/autograd/Variable.hpp @@ -0,0 +1,176 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace af { + namespace autograd { + class Variable + { + public: + typedef std::function, Variable)> BackwardFunc_t; + typedef std::unordered_map Cache_t; + typedef std::vector DAG_t; + + private: + class Shared { + public: + Shared() : + m_data(), + m_grad(), + m_inputs(), + m_grad_parts(), + m_backward(nullptr) + {} + + Shared(af::array data) : + m_data(data), + m_grad(af::constant(0, data.dims(), data.type())), + m_inputs(), + m_grad_parts(), + m_backward(nullptr) + {} + + Shared(af::array data, std::vector inputs, BackwardFunc_t backward) : + m_data(data), + m_grad(af::constant(0, data.dims(), data.type())), + m_inputs(inputs.begin(), inputs.end()), + m_grad_parts(), + m_backward(backward) + {} + + af::array getData() const + { + return m_data; + } + + af::array getGrad() const + { + return m_grad; + } + + void addGrad(Variable grad) + { + m_grad_parts.push_back(grad); + } + + std::vector getGradParts() + { + return m_grad_parts; + } + + std::vector getInputs() + { + return m_inputs; + } + + void evalGrad() + { + m_grad = m_grad_parts[0].getData(); + for (int i = 1; i < (int)m_grad_parts.size(); i++) { + m_grad += m_grad_parts[i].getData(); + } + af::eval(m_grad); + } + + void backward() + { + this->evalGrad(); + if (m_backward) m_backward(m_inputs, m_grad); + } + + private: + af::array m_data; + af::array m_grad; + std::vector m_inputs; + std::vector m_grad_parts; + BackwardFunc_t m_backward; + }; + + public: + + Variable() : + m_shared(new Shared()) + { + } + + Variable(af::array data) : + m_shared(new Shared(data)) + {} + + Variable(af::array data, + std::vector inputs, + BackwardFunc_t backward) : + m_shared(new Shared(data, inputs, backward)) + {} + + af::array getData() const + { + return m_shared->getData(); + } + + af::array getGrad() const + { + return m_shared->getGrad(); + } + + void addGrad(Variable child_grad) + { + m_shared->addGrad(child_grad); + } + + std::vector getInputs() const + { + return m_shared->getInputs(); + } + + void evalGrad() + { + m_shared->evalGrad(); + } + + void backward() + { + m_shared->backward(); + } + + DAG_t build() + { + Cache_t cache; + DAG_t dag; + this->buildGraph(cache, dag); + return dag; + } + + void buildGraph(Cache_t &cache, DAG_t &dag) + { + std::ptrdiff_t id = (std::ptrdiff_t)m_shared.get(); + if (cache.find(id) != cache.end()) { + return; + } + for (auto input : m_shared->getInputs()) { + input.buildGraph(cache, dag); + } + cache[id] = true; + dag.push_back(*this); + } + private: + std::shared_ptr m_shared; + }; + } + namespace ag = autograd; +} From 3f832a0c4d9c15cdd618151dc4273588cef4d2e6 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Sun, 2 Jul 2017 15:59:59 -0700 Subject: [PATCH 04/20] Store gradients as autograd::Variable instead of af::array --- examples/autograd.cpp | 6 ++-- include/af/autograd/Variable.hpp | 47 +++++++++++++++++--------------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/examples/autograd.cpp b/examples/autograd.cpp index 2f0f037..56763f1 100644 --- a/examples/autograd.cpp +++ b/examples/autograd.cpp @@ -19,7 +19,7 @@ void test1() af_print(y.getData()); auto dy = Variable(af::constant(1.0, 5)); backward(y, dy); - af_print(x.getGrad() - 2 * x.getData()); + af_print(x.getGrad().getData() - 2 * x.getData()); } void test2() @@ -31,8 +31,8 @@ void test2() auto z = x * x + x * y + y * y; auto dz = Variable(af::constant(1.0, 5)); backward(z, dz); - af_print(x.getGrad() - 2 * x.getData() - y.getData()); - af_print(y.getGrad() - 2 * y.getData() - x.getData()); + af_print(x.getGrad().getData() - 2 * x.getData() - y.getData()); + af_print(y.getGrad().getData() - 2 * y.getData() - x.getData()); } int main() diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index 6d87250..9b25b86 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -14,11 +14,17 @@ #include #include #include +#include #include namespace af { namespace autograd { + + // Forward declare the function + class Variable; + Variable operator +(const Variable lhs, const Variable rhs); + class Variable { public: @@ -31,25 +37,22 @@ namespace af { public: Shared() : m_data(), - m_grad(), m_inputs(), - m_grad_parts(), + m_grads(), m_backward(nullptr) {} Shared(af::array data) : m_data(data), - m_grad(af::constant(0, data.dims(), data.type())), m_inputs(), - m_grad_parts(), + m_grads(), m_backward(nullptr) {} Shared(af::array data, std::vector inputs, BackwardFunc_t backward) : m_data(data), - m_grad(af::constant(0, data.dims(), data.type())), m_inputs(inputs.begin(), inputs.end()), - m_grad_parts(), + m_grads(), m_backward(backward) {} @@ -58,19 +61,17 @@ namespace af { return m_data; } - af::array getGrad() const + Variable getGrad() const { - return m_grad; + if (m_grads.size() == 0) { + throw std::runtime_error("Gradient hasn't been calculated"); + } + return m_grads[0]; } void addGrad(Variable grad) { - m_grad_parts.push_back(grad); - } - - std::vector getGradParts() - { - return m_grad_parts; + m_grads.push_back(grad); } std::vector getInputs() @@ -80,24 +81,26 @@ namespace af { void evalGrad() { - m_grad = m_grad_parts[0].getData(); - for (int i = 1; i < (int)m_grad_parts.size(); i++) { - m_grad += m_grad_parts[i].getData(); + if (m_grads.size() == 1) return; + Variable grad = m_grads[0]; + for (int i = 1; i < (int)m_grads.size(); i++) { + grad = grad + m_grads[i]; } - af::eval(m_grad); + grad.getData().eval(); + m_grads.clear(); + m_grads.push_back(grad); } void backward() { this->evalGrad(); - if (m_backward) m_backward(m_inputs, m_grad); + if (m_backward) m_backward(m_inputs, m_grads[0]); } private: af::array m_data; - af::array m_grad; std::vector m_inputs; - std::vector m_grad_parts; + std::vector m_grads; BackwardFunc_t m_backward; }; @@ -123,7 +126,7 @@ namespace af { return m_shared->getData(); } - af::array getGrad() const + Variable getGrad() const { return m_shared->getGrad(); } From c94ee3d0c2fbb8f6a5b54c713d6d678d2bfff976 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Mon, 3 Jul 2017 22:24:46 -0700 Subject: [PATCH 05/20] Refactor autograd::Variable, option to disable grad calculations - autograd::Variable::Shared now a thin layer without methods - Variable::BackwardFunc_t renamed to Variable::GradFunc_t - Variable::getData renamed to Variable::array - Variable::getGrad renamed to Variable::grad - Variable::backward renamed to Variable::calcGradInputs --- examples/FFNet.cpp | 1 - examples/autograd.cpp | 46 +++++++--- include/af/autograd/Functions.hpp | 12 +-- include/af/autograd/Grad.hpp | 2 +- include/af/autograd/Variable.hpp | 146 ++++++++++++++++-------------- 5 files changed, 118 insertions(+), 89 deletions(-) diff --git a/examples/FFNet.cpp b/examples/FFNet.cpp index ec69ed1..f4afec0 100644 --- a/examples/FFNet.cpp +++ b/examples/FFNet.cpp @@ -9,7 +9,6 @@ #include -using namespace af; using namespace af; using namespace af::nn; diff --git a/examples/autograd.cpp b/examples/autograd.cpp index 56763f1..c37c0a4 100644 --- a/examples/autograd.cpp +++ b/examples/autograd.cpp @@ -13,26 +13,47 @@ using af::autograd::Variable; using af::autograd::backward; void test1() { - auto x = Variable(af::randu(5)); - af_print(x.getData()); + auto x = Variable(af::randu(5), true); + af_print(x.array()); auto y = x * x; - af_print(y.getData()); - auto dy = Variable(af::constant(1.0, 5)); + af_print(y.array()); + auto dy = Variable(af::constant(1.0, 5), false); backward(y, dy); - af_print(x.getGrad().getData() - 2 * x.getData()); + auto dx = x.grad(); + af_print(dx.array() - 2 * x.array()); } void test2() { - auto x = Variable(af::randu(5)); - af_print(x.getData()); - auto y = Variable(af::randu(5)); - af_print(y.getData()); + auto x = Variable(af::randu(5), true); + af_print(x.array()); + auto y = Variable(af::randu(5), true); + af_print(y.array()); auto z = x * x + x * y + y * y; - auto dz = Variable(af::constant(1.0, 5)); + auto dz = Variable(af::constant(1.0, 5), false); backward(z, dz); - af_print(x.getGrad().getData() - 2 * x.getData() - y.getData()); - af_print(y.getGrad().getData() - 2 * y.getData() - x.getData()); + auto dx = x.grad(); + auto dy = y.grad(); + af_print(dx.array() - 2 * x.array() - y.array()); + af_print(dy.array() - 2 * y.array() - x.array()); +} + +void test3() +{ + auto x = Variable(af::randu(5), false); + af_print(x.array()); + auto y = Variable(af::randu(5), true); + af_print(y.array()); + auto z = x * x + x * y + y * y; + auto dz = Variable(af::constant(1.0, 5), false); + backward(z, dz); + auto dy = y.grad(); + af_print(dy.array() - 2 * y.array() - x.array()); + try { + auto dx = x.grad(); + } catch(af::exception &ex) { + std::cout << ex.what() << std::endl; + } } int main() @@ -40,5 +61,6 @@ int main() af::info(); test1(); test2(); + test3(); return 0; } diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index 0985ae8..8bb5edb 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -15,22 +15,22 @@ namespace af { Variable operator +(const Variable lhs, const Variable rhs) { - auto result = lhs.getData() + rhs.getData(); - auto backward = [](std::vector inputs, Variable grad_output) { + auto result = lhs.array() + rhs.array(); + auto grad_func = [](std::vector inputs, Variable grad_output) { inputs[0].addGrad(grad_output); inputs[1].addGrad(grad_output); }; - return Variable(result, {lhs, rhs}, backward); + return Variable(result, {lhs, rhs}, grad_func); } Variable operator *(const Variable lhs, const Variable rhs) { - auto result = lhs.getData() * rhs.getData(); - auto backward = [](std::vector inputs, Variable grad_output) { + auto result = lhs.array() * rhs.array(); + auto grad_func = [](std::vector inputs, Variable grad_output) { inputs[0].addGrad(grad_output * inputs[1]); inputs[1].addGrad(grad_output * inputs[0]); }; - return Variable(result, {lhs, rhs}, backward); + return Variable(result, {lhs, rhs}, grad_func); } } diff --git a/include/af/autograd/Grad.hpp b/include/af/autograd/Grad.hpp index e1c2114..2e1d4e9 100644 --- a/include/af/autograd/Grad.hpp +++ b/include/af/autograd/Grad.hpp @@ -18,7 +18,7 @@ namespace af { var.addGrad(grad); Variable::DAG_t dag = var.build(); for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) { - iter->backward(); + iter->calcGradInputs(); } } } diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index 9b25b86..143b530 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -28,80 +28,44 @@ namespace af { class Variable { public: - typedef std::function, Variable)> BackwardFunc_t; + typedef std::function, Variable)> GradFunc_t; typedef std::unordered_map Cache_t; typedef std::vector DAG_t; private: - class Shared { - public: + struct Shared { Shared() : + m_calc_grad(true), m_data(), m_inputs(), m_grads(), - m_backward(nullptr) + m_grad_func(nullptr) {} - Shared(af::array data) : + Shared(af::array data, bool calc_grad) : + m_calc_grad(calc_grad), m_data(data), m_inputs(), m_grads(), - m_backward(nullptr) + m_grad_func(nullptr) {} - Shared(af::array data, std::vector inputs, BackwardFunc_t backward) : + Shared(af::array data, + std::vector inputs, + GradFunc_t grad_func, + bool calc_grad) : + m_calc_grad(calc_grad), m_data(data), m_inputs(inputs.begin(), inputs.end()), m_grads(), - m_backward(backward) + m_grad_func(grad_func) {} - af::array getData() const - { - return m_data; - } - - Variable getGrad() const - { - if (m_grads.size() == 0) { - throw std::runtime_error("Gradient hasn't been calculated"); - } - return m_grads[0]; - } - - void addGrad(Variable grad) - { - m_grads.push_back(grad); - } - - std::vector getInputs() - { - return m_inputs; - } - - void evalGrad() - { - if (m_grads.size() == 1) return; - Variable grad = m_grads[0]; - for (int i = 1; i < (int)m_grads.size(); i++) { - grad = grad + m_grads[i]; - } - grad.getData().eval(); - m_grads.clear(); - m_grads.push_back(grad); - } - - void backward() - { - this->evalGrad(); - if (m_backward) m_backward(m_inputs, m_grads[0]); - } - - private: + bool m_calc_grad; af::array m_data; std::vector m_inputs; std::vector m_grads; - BackwardFunc_t m_backward; + GradFunc_t m_grad_func; }; public: @@ -111,62 +75,106 @@ namespace af { { } - Variable(af::array data) : - m_shared(new Shared(data)) + Variable(af::array data, bool calc_grad) : + m_shared(new Shared(data, calc_grad)) {} Variable(af::array data, std::vector inputs, - BackwardFunc_t backward) : - m_shared(new Shared(data, inputs, backward)) - {} + GradFunc_t grad_func) : + m_shared(nullptr) + { + bool calc_grad = false; + for (auto input : inputs) { + calc_grad |= input.isCalcGrad(); + } + if (calc_grad) { + m_shared = std::shared_ptr(new Shared(data, inputs, grad_func, true)); + } else { + m_shared = std::shared_ptr(new Shared(data, false)); + } + } + + af::array array() const + { + return m_shared->m_data; + } - af::array getData() const + Variable grad() const { - return m_shared->getData(); + if (!m_shared->m_calc_grad) { + throw af::exception("Gradient calclation disabled."); + } + if (m_shared->m_grads.size() == 0) { + throw af::exception("Gradient hasn't been calculated yet."); + } + return m_shared->m_grads[0]; } - Variable getGrad() const + bool isCalcGrad() { - return m_shared->getGrad(); + return m_shared->m_calc_grad; + } + + void setCalcGrad(bool calc_grad) + { + m_shared->m_calc_grad = calc_grad; + if (!calc_grad) { + m_shared->m_grad_func = nullptr; + m_shared->m_inputs.clear(); + m_shared->m_grads.clear(); + } } void addGrad(Variable child_grad) { - m_shared->addGrad(child_grad); + if (m_shared->m_calc_grad) { + m_shared->m_grads.push_back(child_grad); + } } std::vector getInputs() const { - return m_shared->getInputs(); + return m_shared->m_inputs; } void evalGrad() { - m_shared->evalGrad(); + // Flag asking not to calculate gradients + if (!m_shared->m_calc_grad) return; + Variable grad = m_shared->m_grads[0]; + for (unsigned i = 1; i < m_shared->m_grads.size(); i++) { + grad = grad + m_shared->m_grads[i]; + } + grad.array().eval(); + m_shared->m_grads.clear(); + m_shared->m_grads.push_back(grad); } - void backward() + void calcGradInputs() { - m_shared->backward(); + evalGrad(); + if (m_shared->m_grad_func) { + m_shared->m_grad_func(m_shared->m_inputs, m_shared->m_grads[0]); + } } DAG_t build() { Cache_t cache; DAG_t dag; - this->buildGraph(cache, dag); + this->buildSubGraph(cache, dag); return dag; } - void buildGraph(Cache_t &cache, DAG_t &dag) + void buildSubGraph(Cache_t &cache, DAG_t &dag) { std::ptrdiff_t id = (std::ptrdiff_t)m_shared.get(); if (cache.find(id) != cache.end()) { return; } - for (auto input : m_shared->getInputs()) { - input.buildGraph(cache, dag); + for (auto input : m_shared->m_inputs) { + input.buildSubGraph(cache, dag); } cache[id] = true; dag.push_back(*this); From 7bb0b6c5ba149580f02cb15dbd7a6bc41bdfc47d Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Tue, 4 Jul 2017 17:18:39 -0700 Subject: [PATCH 06/20] Changing autograd::backward function to Variable::backward method --- examples/autograd.cpp | 7 +++---- include/af/autograd.h | 1 - include/af/autograd/Grad.hpp | 26 -------------------------- include/af/autograd/Variable.hpp | 9 +++++++++ 4 files changed, 12 insertions(+), 31 deletions(-) delete mode 100644 include/af/autograd/Grad.hpp diff --git a/examples/autograd.cpp b/examples/autograd.cpp index c37c0a4..244c2b0 100644 --- a/examples/autograd.cpp +++ b/examples/autograd.cpp @@ -10,7 +10,6 @@ #include using af::autograd::Variable; -using af::autograd::backward; void test1() { auto x = Variable(af::randu(5), true); @@ -18,7 +17,7 @@ void test1() auto y = x * x; af_print(y.array()); auto dy = Variable(af::constant(1.0, 5), false); - backward(y, dy); + y.backward(dy); auto dx = x.grad(); af_print(dx.array() - 2 * x.array()); } @@ -31,7 +30,7 @@ void test2() af_print(y.array()); auto z = x * x + x * y + y * y; auto dz = Variable(af::constant(1.0, 5), false); - backward(z, dz); + z.backward(dz); auto dx = x.grad(); auto dy = y.grad(); af_print(dx.array() - 2 * x.array() - y.array()); @@ -46,7 +45,7 @@ void test3() af_print(y.array()); auto z = x * x + x * y + y * y; auto dz = Variable(af::constant(1.0, 5), false); - backward(z, dz); + z.backward(dz); auto dy = y.grad(); af_print(dy.array() - 2 * y.array() - x.array()); try { diff --git a/include/af/autograd.h b/include/af/autograd.h index e85625c..ceda2b1 100644 --- a/include/af/autograd.h +++ b/include/af/autograd.h @@ -8,4 +8,3 @@ ********************************************************/ #include #include -#include diff --git a/include/af/autograd/Grad.hpp b/include/af/autograd/Grad.hpp deleted file mode 100644 index 2e1d4e9..0000000 --- a/include/af/autograd/Grad.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************* - * Copyright (c) 2017, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace af { - namespace autograd { - - void backward(Variable var, Variable grad) - { - var.addGrad(grad); - Variable::DAG_t dag = var.build(); - for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) { - iter->calcGradInputs(); - } - } - } - namespace ag = autograd; -} diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index 143b530..ea8bd4b 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -159,6 +159,15 @@ namespace af { } } + void backward(Variable grad) + { + this->addGrad(grad); + DAG_t dag = this->build(); + for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) { + iter->calcGradInputs(); + } + } + DAG_t build() { Cache_t cache; From a316af0727a7aeeadf9018d7c4bc946f766d066a Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Tue, 4 Jul 2017 22:11:42 -0700 Subject: [PATCH 07/20] Moving autograd from header only lib to a compiled lib --- CMakeLists.txt | 31 +++++- examples/CMakeLists.txt | 7 +- include/af/autograd/Functions.hpp | 25 +---- include/af/autograd/Variable.hpp | 172 +++++------------------------- src/autograd/Functions.cpp | 37 +++++++ src/autograd/Variable.cpp | 157 +++++++++++++++++++++++++++ 6 files changed, 253 insertions(+), 176 deletions(-) create mode 100644 src/autograd/Functions.cpp create mode 100644 src/autograd/Variable.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 104d635..699baf6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,36 @@ -cmake_minimum_required(VERSION 3.5.2) +cmake_minimum_required(VERSION 3.5.1) project(ArrayFireML VERSION 0.1.0 LANGUAGES C CXX) find_package(ArrayFire REQUIRED) -set(ArrayFireML_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include) + +add_library(afml SHARED "") + +target_sources(afml + PRIVATE + src/autograd/Variable.cpp + src/autograd/Functions.cpp + ) + +target_include_directories(afml + PUBLIC + ${ArrayFire_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR}/include + ) + +target_link_libraries(afml + PUBLIC + af + ) + +set_target_properties(afml + PROPERTIES + VERSION "${ArrayFireML_VERSION}" + SOVERSION "${ArrayFireML_VERSION_MAJOR}" + CXX_STANDARD 11 + ) + + add_subdirectory(examples) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 9a39b82..6e92389 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -2,14 +2,9 @@ function(build_example SRC) get_filename_component(src_name ${SRC} NAME_WE) set(target "${src_name}") add_executable(${target} ${SRC}) - target_include_directories(${target} - PRIVATE - ${ArrayFire_INCLUDE_DIRS} - ${ArrayFireML_INCLUDE_DIRS} - ) target_link_libraries(${target} PRIVATE - af + afml ) target_compile_features(${target} PRIVATE cxx_range_for) diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index 8bb5edb..902a9f6 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -8,31 +8,12 @@ ********************************************************/ #pragma once -#include - namespace af { namespace autograd { - Variable operator +(const Variable lhs, const Variable rhs) - { - auto result = lhs.array() + rhs.array(); - auto grad_func = [](std::vector inputs, Variable grad_output) { - inputs[0].addGrad(grad_output); - inputs[1].addGrad(grad_output); - }; - return Variable(result, {lhs, rhs}, grad_func); - } - - Variable operator *(const Variable lhs, const Variable rhs) - { - auto result = lhs.array() * rhs.array(); - auto grad_func = [](std::vector inputs, Variable grad_output) { - inputs[0].addGrad(grad_output * inputs[1]); - inputs[1].addGrad(grad_output * inputs[0]); - }; - return Variable(result, {lhs, rhs}, grad_func); - } + class Variable; + Variable operator +(const Variable lhs, const Variable rhs); + Variable operator *(const Variable lhs, const Variable rhs); } - namespace ag = autograd; } diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index ea8bd4b..5d7ca36 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -14,17 +14,11 @@ #include #include #include -#include #include namespace af { namespace autograd { - - // Forward declare the function - class Variable; - Variable operator +(const Variable lhs, const Variable rhs); - class Variable { public: @@ -34,32 +28,12 @@ namespace af { private: struct Shared { - Shared() : - m_calc_grad(true), - m_data(), - m_inputs(), - m_grads(), - m_grad_func(nullptr) - {} - - Shared(af::array data, bool calc_grad) : - m_calc_grad(calc_grad), - m_data(data), - m_inputs(), - m_grads(), - m_grad_func(nullptr) - {} - + Shared(); + Shared(af::array data, bool calc_grad); Shared(af::array data, std::vector inputs, GradFunc_t grad_func, - bool calc_grad) : - m_calc_grad(calc_grad), - m_data(data), - m_inputs(inputs.begin(), inputs.end()), - m_grads(), - m_grad_func(grad_func) - {} + bool calc_grad); bool m_calc_grad; af::array m_data; @@ -70,127 +44,33 @@ namespace af { public: - Variable() : - m_shared(new Shared()) - { - } - - Variable(af::array data, bool calc_grad) : - m_shared(new Shared(data, calc_grad)) - {} - + Variable(); + Variable(af::array data, bool calc_grad); Variable(af::array data, std::vector inputs, - GradFunc_t grad_func) : - m_shared(nullptr) - { - bool calc_grad = false; - for (auto input : inputs) { - calc_grad |= input.isCalcGrad(); - } - if (calc_grad) { - m_shared = std::shared_ptr(new Shared(data, inputs, grad_func, true)); - } else { - m_shared = std::shared_ptr(new Shared(data, false)); - } - } - - af::array array() const - { - return m_shared->m_data; - } - - Variable grad() const - { - if (!m_shared->m_calc_grad) { - throw af::exception("Gradient calclation disabled."); - } - if (m_shared->m_grads.size() == 0) { - throw af::exception("Gradient hasn't been calculated yet."); - } - return m_shared->m_grads[0]; - } - - bool isCalcGrad() - { - return m_shared->m_calc_grad; - } - - void setCalcGrad(bool calc_grad) - { - m_shared->m_calc_grad = calc_grad; - if (!calc_grad) { - m_shared->m_grad_func = nullptr; - m_shared->m_inputs.clear(); - m_shared->m_grads.clear(); - } - } - - void addGrad(Variable child_grad) - { - if (m_shared->m_calc_grad) { - m_shared->m_grads.push_back(child_grad); - } - } - - std::vector getInputs() const - { - return m_shared->m_inputs; - } - - void evalGrad() - { - // Flag asking not to calculate gradients - if (!m_shared->m_calc_grad) return; - Variable grad = m_shared->m_grads[0]; - for (unsigned i = 1; i < m_shared->m_grads.size(); i++) { - grad = grad + m_shared->m_grads[i]; - } - grad.array().eval(); - m_shared->m_grads.clear(); - m_shared->m_grads.push_back(grad); - } - - void calcGradInputs() - { - evalGrad(); - if (m_shared->m_grad_func) { - m_shared->m_grad_func(m_shared->m_inputs, m_shared->m_grads[0]); - } - } - - void backward(Variable grad) - { - this->addGrad(grad); - DAG_t dag = this->build(); - for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) { - iter->calcGradInputs(); - } - } - - DAG_t build() - { - Cache_t cache; - DAG_t dag; - this->buildSubGraph(cache, dag); - return dag; - } - - void buildSubGraph(Cache_t &cache, DAG_t &dag) - { - std::ptrdiff_t id = (std::ptrdiff_t)m_shared.get(); - if (cache.find(id) != cache.end()) { - return; - } - for (auto input : m_shared->m_inputs) { - input.buildSubGraph(cache, dag); - } - cache[id] = true; - dag.push_back(*this); - } + GradFunc_t grad_func); + + af::array array() const; + + Variable grad() const; + + bool isCalcGrad(); + + void setCalcGrad(bool calc_grad); + + void addGrad(Variable child_grad); + + void evalGrad(); + + void calcGradInputs(); + + void backward(Variable grad); + + DAG_t build(); + + void buildSubGraph(Cache_t &cache, DAG_t &dag); private: std::shared_ptr m_shared; }; } - namespace ag = autograd; } diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp new file mode 100644 index 0000000..d24a78d --- /dev/null +++ b/src/autograd/Functions.cpp @@ -0,0 +1,37 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include + +namespace af { + namespace autograd { + + Variable operator +(const Variable lhs, const Variable rhs) + { + auto result = lhs.array() + rhs.array(); + auto grad_func = [](std::vector inputs, Variable grad_output) { + inputs[0].addGrad(grad_output); + inputs[1].addGrad(grad_output); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + Variable operator *(const Variable lhs, const Variable rhs) + { + auto result = lhs.array() * rhs.array(); + auto grad_func = [](std::vector inputs, Variable grad_output) { + inputs[0].addGrad(grad_output * inputs[1]); + inputs[1].addGrad(grad_output * inputs[0]); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + } +} diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp new file mode 100644 index 0000000..118e199 --- /dev/null +++ b/src/autograd/Variable.cpp @@ -0,0 +1,157 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include + +namespace af { + namespace autograd { + + Variable::Shared::Shared() : + m_calc_grad(true), + m_data(), + m_inputs(), + m_grads(), + m_grad_func(nullptr) + {} + + Variable::Shared::Shared(af::array data, bool calc_grad) : + m_calc_grad(calc_grad), + m_data(data), + m_inputs(), + m_grads(), + m_grad_func(nullptr) + {} + + Variable::Shared::Shared(af::array data, + std::vector inputs, + GradFunc_t grad_func, + bool calc_grad) : + m_calc_grad(calc_grad), + m_data(data), + m_inputs(inputs.begin(), inputs.end()), + m_grads(), + m_grad_func(grad_func) + {} + + Variable::Variable() : + m_shared(new Shared()) + { + } + + Variable::Variable(af::array data, bool calc_grad) : + m_shared(new Shared(data, calc_grad)) + {} + + Variable::Variable(af::array data, + std::vector inputs, + GradFunc_t grad_func) : + m_shared(nullptr) + { + bool calc_grad = false; + for (auto input : inputs) { + calc_grad |= input.isCalcGrad(); + } + if (calc_grad) { + m_shared = std::shared_ptr(new Shared(data, inputs, grad_func, true)); + } else { + m_shared = std::shared_ptr(new Shared(data, false)); + } + } + + af::array Variable::array() const + { + return m_shared->m_data; + } + + Variable Variable::grad() const + { + if (!m_shared->m_calc_grad) { + throw af::exception("Gradient calclation disabled."); + } + if (m_shared->m_grads.size() == 0) { + throw af::exception("Gradient hasn't been calculated yet."); + } + return m_shared->m_grads[0]; + } + + bool Variable::isCalcGrad() + { + return m_shared->m_calc_grad; + } + + void Variable::setCalcGrad(bool calc_grad) + { + m_shared->m_calc_grad = calc_grad; + if (!calc_grad) { + m_shared->m_grad_func = nullptr; + m_shared->m_inputs.clear(); + m_shared->m_grads.clear(); + } + } + + void Variable::addGrad(Variable child_grad) + { + if (m_shared->m_calc_grad) { + m_shared->m_grads.push_back(child_grad); + } + } + + void Variable::evalGrad() + { + // Flag asking not to calculate gradients + if (!m_shared->m_calc_grad) return; + Variable grad = m_shared->m_grads[0]; + for (unsigned i = 1; i < m_shared->m_grads.size(); i++) { + grad = grad + m_shared->m_grads[i]; + } + grad.array().eval(); + m_shared->m_grads.clear(); + m_shared->m_grads.push_back(grad); + } + + void Variable::calcGradInputs() + { + evalGrad(); + if (m_shared->m_grad_func) { + m_shared->m_grad_func(m_shared->m_inputs, m_shared->m_grads[0]); + } + } + + void Variable::backward(Variable grad) + { + this->addGrad(grad); + Variable::DAG_t dag = this->build(); + for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) { + iter->calcGradInputs(); + } + } + + Variable::DAG_t Variable::build() + { + Cache_t cache; + Variable::DAG_t dag; + this->buildSubGraph(cache, dag); + return dag; + } + + void Variable::buildSubGraph(Cache_t &cache, Variable::DAG_t &dag) + { + std::ptrdiff_t id = (std::ptrdiff_t)m_shared.get(); + if (cache.find(id) != cache.end()) { + return; + } + for (auto input : m_shared->m_inputs) { + input.buildSubGraph(cache, dag); + } + cache[id] = true; + dag.push_back(*this); + } + } +} From d7edafc6e896331bee7982deb6d297f6783c61c9 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Tue, 4 Jul 2017 23:36:53 -0700 Subject: [PATCH 08/20] Adding negate, reciprocal, subtract and divide --- examples/autograd.cpp | 62 +++++++++++++++++++++++-------- include/af/autograd/Functions.hpp | 9 ++++- include/af/autograd/Variable.hpp | 20 +++++----- src/autograd/Functions.cpp | 49 ++++++++++++++++++++++-- src/autograd/Variable.cpp | 36 ++++++++++-------- 5 files changed, 128 insertions(+), 48 deletions(-) diff --git a/examples/autograd.cpp b/examples/autograd.cpp index 244c2b0..40b1dc9 100644 --- a/examples/autograd.cpp +++ b/examples/autograd.cpp @@ -9,57 +9,87 @@ #include +#define VERIFY(VAL) do { \ + auto res = af::allTrue(af::abs(VAL) < 1E-5); \ + printf("%s:%d %s\n", __FUNCTION__, __LINE__, \ + res ? "PASS" : "FAIL"); \ + } while(0) + using af::autograd::Variable; -void test1() +void test_multiply() { auto x = Variable(af::randu(5), true); - af_print(x.array()); auto y = x * x; - af_print(y.array()); auto dy = Variable(af::constant(1.0, 5), false); y.backward(dy); auto dx = x.grad(); - af_print(dx.array() - 2 * x.array()); + VERIFY(dx.array() - 2 * x.array()); } -void test2() +void test_multipl_add() { auto x = Variable(af::randu(5), true); - af_print(x.array()); auto y = Variable(af::randu(5), true); - af_print(y.array()); auto z = x * x + x * y + y * y; auto dz = Variable(af::constant(1.0, 5), false); z.backward(dz); auto dx = x.grad(); auto dy = y.grad(); - af_print(dx.array() - 2 * x.array() - y.array()); - af_print(dy.array() - 2 * y.array() - x.array()); + VERIFY(dx.array() - 2 * x.array() - y.array()); + VERIFY(dy.array() - 2 * y.array() - x.array()); } -void test3() +void test_no_calc_grad() { auto x = Variable(af::randu(5), false); - af_print(x.array()); auto y = Variable(af::randu(5), true); - af_print(y.array()); auto z = x * x + x * y + y * y; auto dz = Variable(af::constant(1.0, 5), false); z.backward(dz); auto dy = y.grad(); - af_print(dy.array() - 2 * y.array() - x.array()); + VERIFY(dy.array() - 2 * y.array() - x.array()); try { auto dx = x.grad(); } catch(af::exception &ex) { std::cout << ex.what() << std::endl; + return; } + printf("%s:%d No Gradient check Failed\n"); +} + +void test_multiply_sub() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = x * x - x * y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + VERIFY(dx.array() - (2 * x.array() - y.array())); + VERIFY(dy.array() - (-x.array())); +} + +void test_divide_add() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = x + x / y + y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + VERIFY(dx.array() - (1.0 + 1.0 / y.array())); + VERIFY(dy.array() - (1.0 - x.array() / (y.array() * y.array()))); } int main() { af::info(); - test1(); - test2(); - test3(); + test_multiply(); + test_multipl_add(); + test_no_calc_grad(); + test_multiply_sub(); + test_divide_add(); return 0; } diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index 902a9f6..2fa493b 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -13,7 +13,12 @@ namespace af { class Variable; - Variable operator +(const Variable lhs, const Variable rhs); - Variable operator *(const Variable lhs, const Variable rhs); + Variable operator +(const Variable &lhs, const Variable &rhs); + Variable operator *(const Variable &lhs, const Variable &rhs); + Variable operator -(const Variable &lhs, const Variable &rhs); + Variable operator /(const Variable &lhs, const Variable &rhs); + + Variable negate(const Variable &input); + Variable reciprocal(const Variable &input); } } diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index 5d7ca36..74dd61e 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -22,16 +22,16 @@ namespace af { class Variable { public: - typedef std::function, Variable)> GradFunc_t; + typedef std::function &, const Variable &)> GradFunc_t; typedef std::unordered_map Cache_t; typedef std::vector DAG_t; private: struct Shared { Shared(); - Shared(af::array data, bool calc_grad); - Shared(af::array data, - std::vector inputs, + Shared(const af::array &data, bool calc_grad); + Shared(const af::array &data, + const std::vector &inputs, GradFunc_t grad_func, bool calc_grad); @@ -45,26 +45,26 @@ namespace af { public: Variable(); - Variable(af::array data, bool calc_grad); - Variable(af::array data, - std::vector inputs, + Variable(const af::array &data, bool calc_grad); + Variable(const af::array &data, + const std::vector &inputs, GradFunc_t grad_func); af::array array() const; Variable grad() const; - bool isCalcGrad(); + bool isCalcGrad() const; void setCalcGrad(bool calc_grad); - void addGrad(Variable child_grad); + void addGrad(const Variable &child_grad); void evalGrad(); void calcGradInputs(); - void backward(Variable grad); + void backward(const Variable &grad); DAG_t build(); diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index d24a78d..e6692ad 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -13,25 +13,66 @@ namespace af { namespace autograd { - Variable operator +(const Variable lhs, const Variable rhs) + Variable operator +(const Variable &lhs, const Variable &rhs) { auto result = lhs.array() + rhs.array(); - auto grad_func = [](std::vector inputs, Variable grad_output) { + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { inputs[0].addGrad(grad_output); inputs[1].addGrad(grad_output); }; return Variable(result, {lhs, rhs}, grad_func); } - Variable operator *(const Variable lhs, const Variable rhs) + Variable operator -(const Variable &lhs, const Variable &rhs) + { + auto result = lhs.array() - rhs.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(grad_output); + inputs[1].addGrad(negate(grad_output)); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + Variable operator *(const Variable &lhs, const Variable &rhs) { auto result = lhs.array() * rhs.array(); - auto grad_func = [](std::vector inputs, Variable grad_output) { + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { inputs[0].addGrad(grad_output * inputs[1]); inputs[1].addGrad(grad_output * inputs[0]); }; return Variable(result, {lhs, rhs}, grad_func); } + Variable operator /(const Variable &lhs, const Variable &rhs) + { + auto result = lhs.array() / rhs.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto inputs_1_rec = reciprocal(inputs[1]); + auto grad_input_0 = grad_output * inputs_1_rec; + inputs[0].addGrad(grad_input_0); + inputs[1].addGrad(grad_input_0 * negate(inputs[0]) * inputs_1_rec); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + Variable negate(const Variable &input) + { + auto result = 0.0 - input.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(negate(grad_output)); + }; + return Variable(result, {input}, grad_func); + } + + Variable reciprocal(const Variable &input) + { + auto result = 1.0 / input.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto res = reciprocal(inputs[0]); + inputs[0].addGrad(negate(grad_output) * res * res); + }; + return Variable(result, {input}, grad_func); + } + } } diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp index 118e199..3a7a41b 100644 --- a/src/autograd/Variable.cpp +++ b/src/autograd/Variable.cpp @@ -21,7 +21,7 @@ namespace af { m_grad_func(nullptr) {} - Variable::Shared::Shared(af::array data, bool calc_grad) : + Variable::Shared::Shared(const af::array &data, bool calc_grad) : m_calc_grad(calc_grad), m_data(data), m_inputs(), @@ -29,8 +29,8 @@ namespace af { m_grad_func(nullptr) {} - Variable::Shared::Shared(af::array data, - std::vector inputs, + Variable::Shared::Shared(const af::array &data, + const std::vector &inputs, GradFunc_t grad_func, bool calc_grad) : m_calc_grad(calc_grad), @@ -45,13 +45,13 @@ namespace af { { } - Variable::Variable(af::array data, bool calc_grad) : + Variable::Variable(const af::array &data, bool calc_grad) : m_shared(new Shared(data, calc_grad)) {} - Variable::Variable(af::array data, - std::vector inputs, - GradFunc_t grad_func) : + Variable::Variable(const af::array &data, + const std::vector &inputs, + GradFunc_t grad_func) : m_shared(nullptr) { bool calc_grad = false; @@ -81,7 +81,7 @@ namespace af { return m_shared->m_grads[0]; } - bool Variable::isCalcGrad() + bool Variable::isCalcGrad() const { return m_shared->m_calc_grad; } @@ -96,7 +96,7 @@ namespace af { } } - void Variable::addGrad(Variable child_grad) + void Variable::addGrad(const Variable &child_grad) { if (m_shared->m_calc_grad) { m_shared->m_grads.push_back(child_grad); @@ -107,13 +107,17 @@ namespace af { { // Flag asking not to calculate gradients if (!m_shared->m_calc_grad) return; - Variable grad = m_shared->m_grads[0]; - for (unsigned i = 1; i < m_shared->m_grads.size(); i++) { - grad = grad + m_shared->m_grads[i]; + + // Best not to evaluate the JIT immediately if theres only a single gradient + if (m_shared->m_grads.size() > 1) { + Variable grad = m_shared->m_grads[0]; + for (unsigned i = 1; i < m_shared->m_grads.size(); i++) { + grad = grad + m_shared->m_grads[i]; + } + grad.array().eval(); + m_shared->m_grads.clear(); + m_shared->m_grads.push_back(grad); } - grad.array().eval(); - m_shared->m_grads.clear(); - m_shared->m_grads.push_back(grad); } void Variable::calcGradInputs() @@ -124,7 +128,7 @@ namespace af { } } - void Variable::backward(Variable grad) + void Variable::backward(const Variable &grad) { this->addGrad(grad); Variable::DAG_t dag = this->build(); From 664cf7c3ea5a2fd1f609e095fef38493103c29f9 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Tue, 4 Jul 2017 23:48:31 -0700 Subject: [PATCH 09/20] Add scalar support for operators --- examples/autograd.cpp | 14 ++++++++++++++ include/af/autograd/Functions.hpp | 10 ++++++++++ src/autograd/Functions.cpp | 24 ++++++++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/examples/autograd.cpp b/examples/autograd.cpp index 40b1dc9..2286233 100644 --- a/examples/autograd.cpp +++ b/examples/autograd.cpp @@ -83,6 +83,19 @@ void test_divide_add() VERIFY(dy.array() - (1.0 - x.array() / (y.array() * y.array()))); } +void test_multiply_add_scalar() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = 2 * x + x * y + y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + VERIFY(dx.array() - (2.0 + y.array())); + VERIFY(dy.array() - (1.0 + x.array())); +} + int main() { af::info(); @@ -91,5 +104,6 @@ int main() test_no_calc_grad(); test_multiply_sub(); test_divide_add(); + test_multiply_add_scalar(); return 0; } diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index 2fa493b..d4203cd 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -18,6 +18,16 @@ namespace af { Variable operator -(const Variable &lhs, const Variable &rhs); Variable operator /(const Variable &lhs, const Variable &rhs); + Variable operator +(const double &lhs, const Variable &rhs); + Variable operator *(const double &lhs, const Variable &rhs); + Variable operator -(const double &lhs, const Variable &rhs); + Variable operator /(const double &lhs, const Variable &rhs); + + Variable operator +(const Variable &lhs, const double &rhs); + Variable operator *(const Variable &lhs, const double &rhs); + Variable operator -(const Variable &lhs, const double &rhs); + Variable operator /(const Variable &lhs, const double &rhs); + Variable negate(const Variable &input); Variable reciprocal(const Variable &input); } diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index e6692ad..55c3d33 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -74,5 +74,29 @@ namespace af { return Variable(result, {input}, grad_func); } + +#define INSTANTIATE_OPERATOR(OP) \ + Variable operator OP(const double &lhs_val, const Variable &rhs) \ + { \ + auto lhs = Variable( \ + af::constant(lhs_val, \ + rhs.array().dims(), \ + rhs.array().type()), \ + false); \ + return lhs OP rhs; \ + } \ + Variable operator OP(const Variable &lhs, const double &rhs_val) \ + { \ + auto rhs = Variable( \ + af::constant(rhs_val, \ + lhs.array().dims(), lhs.array().type()), \ + false); \ + return lhs OP rhs; \ + } \ + + INSTANTIATE_OPERATOR(+) + INSTANTIATE_OPERATOR(-) + INSTANTIATE_OPERATOR(*) + INSTANTIATE_OPERATOR(/) } } From 45b21da2b4327fb596395b681350d066c88b34bb Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Wed, 5 Jul 2017 00:08:25 -0700 Subject: [PATCH 10/20] Adding exp, sin, cos, tanh, and sigmoid functions --- examples/autograd.cpp | 35 ++++++++++++ include/af/autograd/Functions.hpp | 6 +++ src/autograd/Functions.cpp | 89 ++++++++++++++++++++++++------- 3 files changed, 110 insertions(+), 20 deletions(-) diff --git a/examples/autograd.cpp b/examples/autograd.cpp index 2286233..8b96bca 100644 --- a/examples/autograd.cpp +++ b/examples/autograd.cpp @@ -96,6 +96,38 @@ void test_multiply_add_scalar() VERIFY(dy.array() - (1.0 + x.array())); } +void test_exp() +{ + auto x = Variable(af::randu(5), true); + auto y = exp(x); + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + VERIFY(dx.array() - (af::exp(x.array()))); +} + +void test_sigmoid() +{ + auto x = Variable(af::randu(5), true); + auto y = sigmoid(x); + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + VERIFY(dx.array() - (y.array() * (1 - y.array()))); + VERIFY(dx.array() - (af::sigmoid(x.array()) * (1 - af::sigmoid(x.array())))); +} + +void test_tanh() +{ + auto x = Variable(af::randu(5), true); + auto y = tanh(x); + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + VERIFY(dx.array() - (1 - y.array() * y.array())); + VERIFY(dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array()))); +} + int main() { af::info(); @@ -105,5 +137,8 @@ int main() test_multiply_sub(); test_divide_add(); test_multiply_add_scalar(); + test_exp(); + test_sigmoid(); + test_tanh(); return 0; } diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index d4203cd..de45495 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -30,5 +30,11 @@ namespace af { Variable negate(const Variable &input); Variable reciprocal(const Variable &input); + + Variable exp(const Variable &input); + Variable sin(const Variable &input); + Variable cos(const Variable &input); + Variable tanh(const Variable &input); + Variable sigmoid(const Variable &input); } } diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index 55c3d33..8b7d876 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -55,26 +55,6 @@ namespace af { return Variable(result, {lhs, rhs}, grad_func); } - Variable negate(const Variable &input) - { - auto result = 0.0 - input.array(); - auto grad_func = [](std::vector &inputs, const Variable &grad_output) { - inputs[0].addGrad(negate(grad_output)); - }; - return Variable(result, {input}, grad_func); - } - - Variable reciprocal(const Variable &input) - { - auto result = 1.0 / input.array(); - auto grad_func = [](std::vector &inputs, const Variable &grad_output) { - auto res = reciprocal(inputs[0]); - inputs[0].addGrad(negate(grad_output) * res * res); - }; - return Variable(result, {input}, grad_func); - } - - #define INSTANTIATE_OPERATOR(OP) \ Variable operator OP(const double &lhs_val, const Variable &rhs) \ { \ @@ -98,5 +78,74 @@ namespace af { INSTANTIATE_OPERATOR(-) INSTANTIATE_OPERATOR(*) INSTANTIATE_OPERATOR(/) + +#undef INSTANTIATE_OPERATOR + + Variable negate(const Variable &input) + { + auto result = 0.0 - input.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(negate(grad_output)); + }; + return Variable(result, {input}, grad_func); + } + + Variable reciprocal(const Variable &input) + { + auto result = 1.0 / input.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto res = reciprocal(inputs[0]); + inputs[0].addGrad(negate(grad_output) * res * res); + }; + return Variable(result, {input}, grad_func); + } + + Variable exp(const Variable &input) + { + auto result = exp(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(exp(inputs[0])); + }; + return Variable(result, {input}, grad_func); + } + + Variable sin(const Variable &input) + { + auto result = sin(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(cos(inputs[0])); + }; + return Variable(result, {input}, grad_func); + } + + Variable cos(const Variable &input) + { + auto result = cos(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(negate(sin(inputs[0]))); + }; + return Variable(result, {input}, grad_func); + } + + Variable tanh(const Variable &input) + { + auto result = tanh(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto tmp = tanh(inputs[0]); + inputs[0].addGrad(1.0 - tmp * tmp); + }; + return Variable(result, {input}, grad_func); + } + + Variable sigmoid(const Variable &input) + { + auto result = sigmoid(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto tmp = sigmoid(inputs[0]); + inputs[0].addGrad(tmp * (1 - tmp)); + }; + return Variable(result, {input}, grad_func); + } + } } From 3b985b0670dc5711cbe002496236abc850182528 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Wed, 5 Jul 2017 00:34:36 -0700 Subject: [PATCH 11/20] Adding expandAs, reduceAs, and transpose --- examples/autograd.cpp | 28 +++++++++++++++++++++++ include/af/autograd/Functions.hpp | 4 ++++ src/autograd/Functions.cpp | 37 +++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/examples/autograd.cpp b/examples/autograd.cpp index 8b96bca..702992b 100644 --- a/examples/autograd.cpp +++ b/examples/autograd.cpp @@ -128,6 +128,32 @@ void test_tanh() VERIFY(dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array()))); } +void test_expand() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5, 2), true); + auto z = y * expandAs(x, y); + auto dz = Variable(af::constant(1.0, 5, 2), false); + z.backward(dz); + auto dy = y.grad(); + auto dx = x.grad(); + VERIFY(dy.array() - af::tile(x.array(), 1, 2)); + VERIFY(dx.array() - af::sum(y.array(), 1)); +} + +void test_reduce() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5, 2), true); + auto z = x * reduceAs(y, x); + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dy = y.grad(); + auto dx = x.grad(); + VERIFY(dy.array() - af::tile(x.array(), 1, 2)); + VERIFY(dx.array() - af::sum(y.array(), 1)); +} + int main() { af::info(); @@ -140,5 +166,7 @@ int main() test_exp(); test_sigmoid(); test_tanh(); + test_expand(); + test_reduce(); return 0; } diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index de45495..49b0954 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -36,5 +36,9 @@ namespace af { Variable cos(const Variable &input); Variable tanh(const Variable &input); Variable sigmoid(const Variable &input); + + Variable transpose(const Variable &input); + Variable expandAs(const Variable &input, const Variable &reference); + Variable reduceAs(const Variable &input, const Variable &reference); } } diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index 8b7d876..653a16d 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -147,5 +147,42 @@ namespace af { return Variable(result, {input}, grad_func); } + Variable transpose(const Variable &input) + { + auto result = transpose(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(transpose(grad_output)); + }; + return Variable(result, {input}, grad_func); + } + + Variable expandAs(const Variable &input, const Variable &reference) + { + dim4 dims(1,1,1,1); + dim4 idims = input.array().dims(); + dim4 rdims = reference.array().dims(); + for (int i = 0; i < 4; i++) { + dims[i] = rdims[i] / idims[i]; + } + auto result = tile(input.array(), dims); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(reduceAs(grad_output, inputs[0])); + }; + return Variable(result, {input}, grad_func); + } + + Variable reduceAs(const Variable &input, const Variable &reference) + { + dim4 idims = input.array().dims(); + dim4 rdims = reference.array().dims(); + auto result = input.array(); + for (int i = 0; i < 4; i++) { + if (idims[i] != rdims[i]) result = sum(result, i); + } + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(expandAs(grad_output, inputs[0])); + }; + return Variable(result, {input}, grad_func); + } } } From 9b052736e7d8a450cc9c2c2a299695786a89bc25 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Wed, 5 Jul 2017 01:07:10 -0700 Subject: [PATCH 12/20] Adding matmul, matmulTN, and matmulNT functions --- include/af/autograd/Functions.hpp | 4 ++ src/autograd/Functions.cpp | 63 +++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp index 49b0954..17a190e 100644 --- a/include/af/autograd/Functions.hpp +++ b/include/af/autograd/Functions.hpp @@ -40,5 +40,9 @@ namespace af { Variable transpose(const Variable &input); Variable expandAs(const Variable &input, const Variable &reference); Variable reduceAs(const Variable &input, const Variable &reference); + + Variable matmul(const Variable &lhs, const Variable &rhs); + Variable matmulTN(const Variable &lhs, const Variable &rhs); + Variable matmulNT(const Variable &lhs, const Variable &rhs); } } diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index 653a16d..fcabaaa 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -184,5 +184,68 @@ namespace af { }; return Variable(result, {input}, grad_func); } + + Variable matmul(const Variable &lhs, const Variable &rhs) + { + // lhs:Input[0] -- [M, N] + // rhs:Input[1] -- [N, K] + //matmul(lhs, rhs) + // -- matmul([M, N], [N, K]) -- [M, K] + // result:grad_output -- [M, K] + auto result = matmul(lhs.array(), rhs.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + // matmulNT(grad_output, inputs[1]) + // -- matmulNT([M, K], [N, K]) + // -- matmul([M, K], [K, N]) -- [M, K] + inputs[0].addGrad(matmulNT(grad_output, inputs[1])); + // matmulTN(inputs[0], grad_output) + // -- matmulTN([M, N], [M, K]) + // -- matmul([N, M], [M, K]) -- [N, K] + inputs[1].addGrad(matmulTN(inputs[0], grad_output)); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + Variable matmulTN(const Variable &lhs, const Variable &rhs) + { + // lhs:Input[0] -- [N, M] + // rhs:Input[1] -- [N, K] + // matmulTN(lhs, rhs) + // -- matmulTN([N, M], [N, K]) + // -- matmul([M, N], [N, K]) -- [M, K] + // result:grad_output -- [M, K] + auto result = matmulTN(lhs.array(), rhs.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + // matmulNT(inputs[1], grad_output) + // -- matmulNT([N, K], [M, K]) + // -- matmul([N, K], [K, M]) -- [N, M] + inputs[0].addGrad(matmulNT(inputs[1], grad_output)); + // matmul(inputs[0], grad_output) + // -- matmulNT([N, M], [M, K]) -- [N, K] + inputs[1].addGrad(matmul(inputs[0], grad_output)); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + Variable matmulNT(const Variable &lhs, const Variable &rhs) + { + // lhs:Input[0] -- [M, N] + // rhs:Input[1] -- [K, N] + // matmulNT(lhs, rhs) + // -- matmulNT([M, N], [K, N]) + // -- matmul([M, N], [N, K]) -- [M, K] + // result:grad_output -- [M, K] + auto result = matmulNT(lhs.array(), rhs.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + // matmul(grad_output, inputs[1]) + // -- matmul([M, K], [K, N]) -- [M, N] + inputs[0].addGrad(matmul(grad_output, inputs[1])); + // matmulTN(grad_output, inputs[0]) + // -- matmulTN([M, K], [M, N]) + // -- matmul([K, M], [M, N]) -- [K, N] + inputs[1].addGrad(matmulTN(grad_output, inputs[0])); + }; + return Variable(result, {lhs, rhs}, grad_func); + } } } From 49b8917f272577b60d6a1f0e778e3c3867ee3e82 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Wed, 5 Jul 2017 01:22:52 -0700 Subject: [PATCH 13/20] Add option to explicitly request higher order gradients. - Disabled by default - can be enabled by passing true as second argument to backward --- include/af/autograd/Variable.hpp | 11 +++++------ src/autograd/Variable.cpp | 23 ++++++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index 74dd61e..e4a14b7 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -60,16 +60,15 @@ namespace af { void addGrad(const Variable &child_grad); - void evalGrad(); + void calcGradInputs(bool retain_grad_graph = false); - void calcGradInputs(); - - void backward(const Variable &grad); - - DAG_t build(); + void backward(const Variable &grad, bool retain_grad_graph = false); void buildSubGraph(Cache_t &cache, DAG_t &dag); private: + void evalGrad(bool retain_grad_graph = false); + + DAG_t build(); std::shared_ptr m_shared; }; } diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp index 3a7a41b..a50443a 100644 --- a/src/autograd/Variable.cpp +++ b/src/autograd/Variable.cpp @@ -103,24 +103,33 @@ namespace af { } } - void Variable::evalGrad() + void Variable::evalGrad(bool retain_grad_graph) { // Flag asking not to calculate gradients if (!m_shared->m_calc_grad) return; // Best not to evaluate the JIT immediately if theres only a single gradient + Variable grad = m_shared->m_grads[0]; if (m_shared->m_grads.size() > 1) { - Variable grad = m_shared->m_grads[0]; for (unsigned i = 1; i < m_shared->m_grads.size(); i++) { grad = grad + m_shared->m_grads[i]; } grad.array().eval(); - m_shared->m_grads.clear(); - m_shared->m_grads.push_back(grad); + m_shared->m_grads.resize(1); + } + + // Remove the graph if not needed + if (!retain_grad_graph) { + // This can be done by extracting af::array and ignoring everything else + auto grad_data = grad.array(); + // Since there's no graph leading this, set calc_grad to false + grad = Variable(grad_data, false); } + + m_shared->m_grads[0] = grad; } - void Variable::calcGradInputs() + void Variable::calcGradInputs(bool retain_grad_graph) { evalGrad(); if (m_shared->m_grad_func) { @@ -128,12 +137,12 @@ namespace af { } } - void Variable::backward(const Variable &grad) + void Variable::backward(const Variable &grad, bool retain_grad_graph) { this->addGrad(grad); Variable::DAG_t dag = this->build(); for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) { - iter->calcGradInputs(); + iter->calcGradInputs(retain_grad_graph); } } From 8bf7f1bb3896b0a5f02d1c9236dba0bf340ea76e Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Wed, 5 Jul 2017 19:28:18 -0700 Subject: [PATCH 14/20] Convert Variable::build and Variable::buildSubGraph to static functions --- include/af/autograd/Variable.hpp | 11 +++++++++-- src/autograd/Variable.cpp | 28 +++++++++++++++++++--------- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index e4a14b7..3c7d176 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -54,6 +54,8 @@ namespace af { Variable grad() const; + std::ptrdiff_t id() const; + bool isCalcGrad() const; void setCalcGrad(bool calc_grad); @@ -64,11 +66,16 @@ namespace af { void backward(const Variable &grad, bool retain_grad_graph = false); - void buildSubGraph(Cache_t &cache, DAG_t &dag); + private: void evalGrad(bool retain_grad_graph = false); - DAG_t build(); + std::vector getInputs() const; + + static void buildSubGraph(Cache_t &cache, DAG_t &dag, const Variable &var); + + static DAG_t build(const Variable &var); + std::shared_ptr m_shared; }; } diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp index a50443a..df6f9e5 100644 --- a/src/autograd/Variable.cpp +++ b/src/autograd/Variable.cpp @@ -81,6 +81,16 @@ namespace af { return m_shared->m_grads[0]; } + std::ptrdiff_t Variable::id() const + { + return (std::ptrdiff_t)m_shared.get(); + } + + std::vector Variable::getInputs() const + { + return m_shared->m_inputs; + } + bool Variable::isCalcGrad() const { return m_shared->m_calc_grad; @@ -140,31 +150,31 @@ namespace af { void Variable::backward(const Variable &grad, bool retain_grad_graph) { this->addGrad(grad); - Variable::DAG_t dag = this->build(); + Variable::DAG_t dag = Variable::build(*this); for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) { iter->calcGradInputs(retain_grad_graph); } } - Variable::DAG_t Variable::build() + Variable::DAG_t Variable::build(const Variable &var) { Cache_t cache; - Variable::DAG_t dag; - this->buildSubGraph(cache, dag); + Variable::DAG_t dag; + Variable::buildSubGraph(cache, dag, var); return dag; } - void Variable::buildSubGraph(Cache_t &cache, Variable::DAG_t &dag) + void Variable::buildSubGraph(Cache_t &cache, Variable::DAG_t &dag, const Variable &var) { - std::ptrdiff_t id = (std::ptrdiff_t)m_shared.get(); + std::ptrdiff_t id = var.id(); if (cache.find(id) != cache.end()) { return; } - for (auto input : m_shared->m_inputs) { - input.buildSubGraph(cache, dag); + for (auto input : var.getInputs()) { + Variable::buildSubGraph(cache, dag, input); } cache[id] = true; - dag.push_back(*this); + dag.push_back(var); } } } From 5eda60030805bf43755a53a0f7ac79dbec44a03f Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Wed, 5 Jul 2017 23:33:57 -0700 Subject: [PATCH 15/20] Overhaul of af::nn to use af::autograd - Implemented baseclass nn::Module - Added basic modules: nn::Linear, nn::Sigmoid, nn:Tanh - Added container modules: nn:Container, nn:Sequential - Deleted unnecessary examples, cleaned up perceptron.cpp --- CMakeLists.txt | 7 +- examples/Activations.cpp | 33 ---- examples/CMakeLists.txt | 8 +- examples/FFNet.cpp | 57 ------- examples/Node.cpp | 21 --- examples/Weights.cpp | 20 --- examples/perceptron.cpp | 43 ++++-- include/af/autograd/Variable.hpp | 6 +- include/af/nn.h | 9 +- include/af/nn/Activations.hpp | 15 -- include/af/nn/Activations/Activation.hpp | 54 ------- include/af/nn/Activations/Sigmoid.hpp | 43 ------ include/af/nn/Activations/Tanh.hpp | 40 ----- include/af/nn/Activations/Threshold.hpp | 42 ----- include/af/nn/{Networks.hpp => Modules.hpp} | 7 +- include/af/nn/Modules/Activations.hpp | 34 ++++ include/af/nn/Modules/Container.hpp | 57 +++++++ include/af/nn/Modules/Linear.hpp | 31 ++++ include/af/nn/Modules/Module.hpp | 41 +++++ include/af/nn/Networks/FFNet.hpp | 104 ------------- include/af/nn/Nodes.hpp | 12 -- include/af/nn/Nodes/Linear.hpp | 68 -------- include/af/nn/Nodes/Node.hpp | 115 -------------- .../af/nn/{Activations/ReLU.hpp => Types.hpp} | 18 ++- include/af/nn/Weights.hpp | 146 ------------------ include/af/nn/common.hpp | 20 --- src/autograd/Variable.cpp | 13 +- src/nn/Modules/Activations.cpp | 33 ++++ src/nn/Modules/Container.cpp | 42 +++++ src/nn/Modules/Linear.cpp | 59 +++++++ src/nn/Modules/Module.cpp | 48 ++++++ src/nn/Types.cpp | 36 +++++ 32 files changed, 452 insertions(+), 830 deletions(-) delete mode 100644 examples/Activations.cpp delete mode 100644 examples/FFNet.cpp delete mode 100644 examples/Node.cpp delete mode 100644 examples/Weights.cpp delete mode 100644 include/af/nn/Activations.hpp delete mode 100644 include/af/nn/Activations/Activation.hpp delete mode 100644 include/af/nn/Activations/Sigmoid.hpp delete mode 100644 include/af/nn/Activations/Tanh.hpp delete mode 100644 include/af/nn/Activations/Threshold.hpp rename include/af/nn/{Networks.hpp => Modules.hpp} (62%) create mode 100644 include/af/nn/Modules/Activations.hpp create mode 100644 include/af/nn/Modules/Container.hpp create mode 100644 include/af/nn/Modules/Linear.hpp create mode 100644 include/af/nn/Modules/Module.hpp delete mode 100644 include/af/nn/Networks/FFNet.hpp delete mode 100644 include/af/nn/Nodes.hpp delete mode 100644 include/af/nn/Nodes/Linear.hpp delete mode 100644 include/af/nn/Nodes/Node.hpp rename include/af/nn/{Activations/ReLU.hpp => Types.hpp} (50%) delete mode 100644 include/af/nn/Weights.hpp delete mode 100644 include/af/nn/common.hpp create mode 100644 src/nn/Modules/Activations.cpp create mode 100644 src/nn/Modules/Container.cpp create mode 100644 src/nn/Modules/Linear.cpp create mode 100644 src/nn/Modules/Module.cpp create mode 100644 src/nn/Types.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 699baf6..c84783d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,8 +10,13 @@ add_library(afml SHARED "") target_sources(afml PRIVATE - src/autograd/Variable.cpp src/autograd/Functions.cpp + src/autograd/Variable.cpp + src/nn/Modules/Activations.cpp + src/nn/Modules/Container.cpp + src/nn/Modules/Linear.cpp + src/nn/Modules/Module.cpp + src/nn/Types.cpp ) target_include_directories(afml diff --git a/examples/Activations.cpp b/examples/Activations.cpp deleted file mode 100644 index 594bd5f..0000000 --- a/examples/Activations.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -using namespace af::nn; - -int main() -{ - const int num = 5; - - af::ArrayVector in = {100 * af::randu(num, 1) - 50}; - af::ArrayVector grad = {100 * af::randu(num, 1)}; - - ReLU r = ReLU(num, 0); - Sigmoid s = Sigmoid(num); - Tanh t = Tanh(num); - - af_print(in[0]); - af_print(r.forward(in)[0]); - af_print(s.forward(in)[0]); - af_print(t.forward(in)[0]); - - af_print(r.backward(in, grad)[0]); - af_print(s.backward(in, grad)[0]); - af_print(t.backward(in, grad)[0]); -} diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 6e92389..b1e2404 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -10,9 +10,9 @@ function(build_example SRC) PRIVATE cxx_range_for) endfunction(build_example) -build_example(Activations.cpp) -build_example(FFNet.cpp) -build_example(Node.cpp) +# build_example(Activations.cpp) +# build_example(FFNet.cpp) +# build_example(Node.cpp) build_example(perceptron.cpp) -build_example(Weights.cpp) +# build_example(Weights.cpp) build_example(autograd.cpp) diff --git a/examples/FFNet.cpp b/examples/FFNet.cpp deleted file mode 100644 index f4afec0..0000000 --- a/examples/FFNet.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -using namespace af; -using namespace af::nn; - -int main() -{ - af::info(); - const int inputSize = 2; - const int hiddenSize = 3; - const int outputSize = 1; - const int numSamples = 4; - const double lr = 0.8; - - float hInput[] = {1, 1, - 0, 0, - 0, 1, - 1, 0}; - - float hOutput[] = {0, - 0, - 1, - 1}; - - af::array in(inputSize, numSamples, hInput); - af::array out(outputSize, numSamples, hOutput); - - - FFNet network(inputSize); - network.addLinearNode(hiddenSize, 5).addActivationNode(); - network.addLinearNode(outputSize, 5).addActivationNode(); - - for (int i = 0; i < 1000; i++) { - - ArrayVector data = network.forward({in}); - double err = af::norm(data[0] - out); - - data[0] = out - data[0]; - - if ((i + 1) % 100 == 0) { - printf("Error at iteration(%d) : %2.10lf\n", i + 1, err); - } - network.backward({in}, data); - network.update(lr); - } - - af_print(af::round(network.forward({in})[0])); -} diff --git a/examples/Node.cpp b/examples/Node.cpp deleted file mode 100644 index eb4229a..0000000 --- a/examples/Node.cpp +++ /dev/null @@ -1,21 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -using namespace af::nn; - -int main() -{ - int inSize = 10; - int outSize = 2; - - Node n(1, &inSize, 1, &outSize, "test"); - n.info(); -} diff --git a/examples/Weights.cpp b/examples/Weights.cpp deleted file mode 100644 index 9e83c06..0000000 --- a/examples/Weights.cpp +++ /dev/null @@ -1,20 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -using namespace af::nn; - -int main() -{ - Weights w(10, 1, 0.05); - af_print(w); - - return 0; -} diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp index 9975a03..799c52a 100644 --- a/examples/perceptron.cpp +++ b/examples/perceptron.cpp @@ -7,17 +7,19 @@ * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ +#include #include using namespace af; using namespace af::nn; +using namespace af::autograd; int main() { const int inputSize = 2; const int outputSize = 1; const int numSamples = 4; - const double lr = 10; + const double lr = 0.005; float hInput[] = {1, 1, 0, 0, @@ -29,29 +31,36 @@ int main() 1, 1}; - af::array in(inputSize, numSamples, hInput); - af::array out(outputSize, numSamples, hOutput); + auto in = af::array(inputSize, numSamples, hInput); + auto out = af::array(outputSize, numSamples, hOutput); - std::vector perceptron; - perceptron.emplace_back(new LinearNode(inputSize, outputSize, 10)); - perceptron.emplace_back(new Sigmoid(inputSize)); + nn::Sequential perceptron; + perceptron.add(nn::Linear(inputSize, outputSize)); + perceptron.add(nn::Sigmoid()); + + Variable result; for (int i = 0; i < 10; i++) { - ArrayVector data = {in}; - std::vector inputs(2); - for (int n = 0; n < 2; n++) { - inputs[n] = data; - data = perceptron[n]->forward(data); - } + // Forward propagation + result = perceptron.forward(nn::input(in)); - data[0] = out - data[0]; + // Calculate loss + // TODO: Use loss function + af::array diff = out - result.array(); + printf("Error at iteration(%d) : %lf\n", i + 1, af::max(af::abs(diff))); - printf("Error at iteration(%d) : %lf\n", i + 1, af::sum(af::abs(data[0])) / numSamples); + // Backward propagation + auto d_result = Variable(diff, false); + result.backward(d_result); - for (int n = 1; n >= 0; n--) { - data = perceptron[n]->backward(inputs[n], data); - perceptron[n]->update(lr); + // Update parameters + // TODO: Should use optimizer + for (auto param : perceptron.parameters()) { + param.array() += lr * param.grad().array(); + param.array().eval(); } } + af_print(result.array()); + return 0; } diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index 3c7d176..8ce10e2 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -50,7 +50,7 @@ namespace af { const std::vector &inputs, GradFunc_t grad_func); - af::array array() const; + af::array& array() const; Variable grad() const; @@ -58,6 +58,10 @@ namespace af { bool isCalcGrad() const; + bool isGradAvailable() const; + + void zeroGrad(); + void setCalcGrad(bool calc_grad); void addGrad(const Variable &child_grad); diff --git a/include/af/nn.h b/include/af/nn.h index 8ee2134..88333fc 100644 --- a/include/af/nn.h +++ b/include/af/nn.h @@ -1,5 +1,5 @@ /******************************************************* - * Copyright (c) 2015, ArrayFire + * Copyright (c) 2017, ArrayFire * All rights reserved. * * This file is distributed under 3-clause BSD license. @@ -9,8 +9,5 @@ #pragma once -#include -#include -#include -#include -#include +#include +#include diff --git a/include/af/nn/Activations.hpp b/include/af/nn/Activations.hpp deleted file mode 100644 index 4972238..0000000 --- a/include/af/nn/Activations.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include -#include -#include -#include diff --git a/include/af/nn/Activations/Activation.hpp b/include/af/nn/Activations/Activation.hpp deleted file mode 100644 index d98eaf3..0000000 --- a/include/af/nn/Activations/Activation.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include - -namespace af -{ - - namespace nn - { - class ActivationNode : public Node - { - protected: - - virtual af::array fn(const af::array &val) - { - return val; - } - - virtual af::array dfn(const af::array &val) - { - return af::constant(1, val.dims()); - } - - public: - - ActivationNode(int size, const char *name="none") : - Node(1, &size, 1, &size, name) - { - } - - ArrayVector forward(const ArrayVector &input) - { - return { fn(input[0]) }; - } - - ArrayVector backward(const ArrayVector &input, - const ArrayVector &gradOutput) - { - return { gradOutput[0] * dfn(input[0]) }; - } - }; - - typedef ActivationNode Activation; - } -} diff --git a/include/af/nn/Activations/Sigmoid.hpp b/include/af/nn/Activations/Sigmoid.hpp deleted file mode 100644 index 1fd31d5..0000000 --- a/include/af/nn/Activations/Sigmoid.hpp +++ /dev/null @@ -1,43 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace af -{ - namespace nn - { - class SigmoidNode : public ActivationNode - { - private: - - af::array fn(const af::array &input) - { - // TODO: replace with af::sigmoid - return 1 / (1 + af::exp(-input)); - } - - af::array dfn(const af::array &input) - { - af::array output = fn(input); - return output * (1 - output); - } - - public: - - SigmoidNode(int size, const char *name="none") : - ActivationNode(size, name) - { - } - }; - - typedef SigmoidNode Sigmoid; - } -} diff --git a/include/af/nn/Activations/Tanh.hpp b/include/af/nn/Activations/Tanh.hpp deleted file mode 100644 index abbcd07..0000000 --- a/include/af/nn/Activations/Tanh.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace af -{ - namespace nn - { - class TanhNode : public ActivationNode - { - private: - - af::array fn(const af::array &input) - { - return af::tanh(input); - } - - af::array dfn(const af::array &input) - { - af::array output = fn(input); - return (1 - output * output); - } - public: - TanhNode(int size, const char *name="none") : - ActivationNode(size, name) - { - } - }; - - typedef TanhNode Tanh; - } -} diff --git a/include/af/nn/Activations/Threshold.hpp b/include/af/nn/Activations/Threshold.hpp deleted file mode 100644 index bded798..0000000 --- a/include/af/nn/Activations/Threshold.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace af -{ - namespace nn - { - class ThresholdNode : public ActivationNode - { - private: - float mVal; - - af::array fn(const af::array &input) - { - af::array cond = (input >= mVal); - return (cond) * input + (1 - cond) * mVal; - } - - af::array dfn(const af::array &input) - { - return (input >= mVal).as(input.type()); - } - public: - ThresholdNode(int size, float val, const char *name="none") : - ActivationNode(size, name), - mVal(val) - { - } - }; - - typedef ThresholdNode Threshold; - } -} diff --git a/include/af/nn/Networks.hpp b/include/af/nn/Modules.hpp similarity index 62% rename from include/af/nn/Networks.hpp rename to include/af/nn/Modules.hpp index 2517121..310e4e9 100644 --- a/include/af/nn/Networks.hpp +++ b/include/af/nn/Modules.hpp @@ -1,5 +1,5 @@ /******************************************************* - * Copyright (c) 2015, ArrayFire + * Copyright (c) 2017, ArrayFire * All rights reserved. * * This file is distributed under 3-clause BSD license. @@ -8,4 +8,7 @@ ********************************************************/ #pragma once -#include +#include +#include +#include +#include diff --git a/include/af/nn/Modules/Activations.hpp b/include/af/nn/Modules/Activations.hpp new file mode 100644 index 0000000..1530cd9 --- /dev/null +++ b/include/af/nn/Modules/Activations.hpp @@ -0,0 +1,34 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include +#include + +namespace af +{ + namespace nn + { + class Sigmoid : public Module + { + public: + Sigmoid(); + + autograd::Variable forward(const autograd::Variable &input); + }; + + class Tanh : public Module + { + public: + Tanh(); + + autograd::Variable forward(const autograd::Variable &input); + }; + } +} diff --git a/include/af/nn/Modules/Container.hpp b/include/af/nn/Modules/Container.hpp new file mode 100644 index 0000000..2ee8c0e --- /dev/null +++ b/include/af/nn/Modules/Container.hpp @@ -0,0 +1,57 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include +#include +#include + +namespace af +{ + namespace nn + { + + typedef std::shared_ptr ModulePtr; + + class Container : public Module + { + protected: + + std::vector m_modules; + + Container(); + + public: + + template + void add(T module) + { + m_modules.emplace_back(new T(module)); + for (auto param : module.parameters()) { + m_parameters.push_back(param); + } + } + + ModulePtr get(int id); + + std::vector modules(); + + virtual autograd::Variable forward(const autograd::Variable &input) = 0; + }; + + class Sequential : public Container + { + public: + + Sequential(); + + virtual autograd::Variable forward(const autograd::Variable &input); + }; + } +} diff --git a/include/af/nn/Modules/Linear.hpp b/include/af/nn/Modules/Linear.hpp new file mode 100644 index 0000000..f7a1ecc --- /dev/null +++ b/include/af/nn/Modules/Linear.hpp @@ -0,0 +1,31 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include + +namespace af +{ + namespace nn + { + class Linear : public Module + { + private: + bool m_bias; + public: + Linear(int input_size, int output_size, bool bias = true, float spread = 0.05); + + Linear(const autograd::Variable &w); + + Linear(const autograd::Variable &w, const autograd::Variable &b); + + autograd::Variable forward(const autograd::Variable &input); + }; + } +} diff --git a/include/af/nn/Modules/Module.hpp b/include/af/nn/Modules/Module.hpp new file mode 100644 index 0000000..a35db1f --- /dev/null +++ b/include/af/nn/Modules/Module.hpp @@ -0,0 +1,41 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include +#include + +#include + +namespace af +{ + namespace nn + { + + class Module + { + protected: + std::vector m_parameters; + + Module(); + + Module(const std::vector ¶meters); + + void setParams(const std::vector ¶meters); + + public: + + std::vector parameters(); + + void zeroGrad(); + + virtual autograd::Variable forward(const autograd::Variable &input) = 0; + }; + } +} diff --git a/include/af/nn/Networks/FFNet.hpp b/include/af/nn/Networks/FFNet.hpp deleted file mode 100644 index 1e00f18..0000000 --- a/include/af/nn/Networks/FFNet.hpp +++ /dev/null @@ -1,104 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include -#include -#include - -namespace af -{ - namespace nn - { - class FeedForwardNetwork : public Node - { - private: - IntVector mNodeSizes; - std::vector mNodes; - std::vector mData; - - template - FeedForwardNetwork& addNodePtr(NodeType *nodePtr) - { - mNodes.emplace_back(nodePtr); - - // TODO: Throw exception of node.getOutSizes() has >1 length - int size = nodePtr->getOutSizes()[0]; - mNodeSizes.push_back(size); - this->setOutSizes(1, &size); - return *this; - } - - public: - - FeedForwardNetwork(const int inputSize, const char *name="none") : - Node(1, &inputSize, name), - mNodeSizes(1), - mNodes(0), - mData(0) - { - mNodeSizes[0] = inputSize; - } - - template - FeedForwardNetwork& addNode(const NodeType &node) - { - return addNodePtr(new NodeType(node)); - } - - - FeedForwardNetwork& addLinearNode(const int size, const float spread = 0.05) - { - return addNodePtr(new LinearNode(mNodeSizes.back(), size, spread)); - } - - template - FeedForwardNetwork& addActivationNode() - { - int size = (int)mNodeSizes.back(); - - // Ensure ActivationType is derived from ActivationNode - ActivationNode *node = new ActivationType(size); - - return addNodePtr(node); - } - - ArrayVector forward(const ArrayVector &input) - { - mData.resize(mNodeSizes.size()); - mData[0] = input; - for (int i = 0; i < (int)mNodes.size(); i++) { - mData[i + 1] = mNodes[i]->forward(mData[i]); - } - return mData.back(); - } - - ArrayVector backward(const ArrayVector &input, - const ArrayVector &gradOutput) - { - //TODO: Assert input coming is same as the stored input - ArrayVector currGradOutput = gradOutput; - for (int i = (int)mNodes.size() - 1; i >= 0; i--) { - currGradOutput = mNodes[i]->backward(mData[i], currGradOutput); - } - return currGradOutput; - } - - void update(float lr) - { - for(int i = 0; i < (int)mNodes.size(); i++) { - mNodes[i]->update(lr); - } - } - }; - - typedef FeedForwardNetwork FFNet; - } -} diff --git a/include/af/nn/Nodes.hpp b/include/af/nn/Nodes.hpp deleted file mode 100644 index a4f7c99..0000000 --- a/include/af/nn/Nodes.hpp +++ /dev/null @@ -1,12 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include diff --git a/include/af/nn/Nodes/Linear.hpp b/include/af/nn/Nodes/Linear.hpp deleted file mode 100644 index 9ad2c85..0000000 --- a/include/af/nn/Nodes/Linear.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include -#include - -namespace af -{ - namespace nn - { - class LinearNode : public Node - { - private: - - Weights mWeight, mBias; - Weights mWeightDiff, mBiasDiff; - - public: - - LinearNode(const int inputSize, const int outputSize, - float spread = 0.05, - const char *name="none") : - Node(1, &inputSize, 1, &outputSize, name), - mWeight(inputSize, outputSize, spread), - mBias(1, outputSize, spread), - mWeightDiff(), mBiasDiff() - { - } - - ArrayVector forward(const ArrayVector &input) - { - return {af::matmul(mWeight, input[0]) + - af::tile(mBias, 1, input[0].dims(1))}; - } - - ArrayVector backward(const ArrayVector &input, - const ArrayVector &gradOutput) - { - float m = input[0].dims(1); - - mWeightDiff = af::matmulNT(gradOutput[0], input[0]) / m; - mBiasDiff = af::sum(gradOutput[0], 1) / m; - - return { af::matmulTN(mWeight, gradOutput[0]) }; - } - - void update(float lr) - { - mWeight += lr * mWeightDiff; - mBias += lr * mBiasDiff; - - mWeight.eval(); - mBias.eval(); - - mWeightDiff.reset(); - mBiasDiff.reset(); - } - }; - } -} diff --git a/include/af/nn/Nodes/Node.hpp b/include/af/nn/Nodes/Node.hpp deleted file mode 100644 index ec4eb12..0000000 --- a/include/af/nn/Nodes/Node.hpp +++ /dev/null @@ -1,115 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include - -#include -#include - -namespace af -{ - namespace nn - { - - class Node - { - private: - IntVector mInputSizes; - IntVector mOutputSizes; - - char mName[MAX_NAME_SIZE]; - - void set(const int *inputSizes, const int *outputSizes, - const char *name, const int count) - { - for (int i = 0; i < (int)mInputSizes.size(); i++) mInputSizes[i] = inputSizes[i]; - for (int i = 0; i < (int)mOutputSizes.size(); i++) mOutputSizes[i] = outputSizes[i]; - - int len = std::min(count, MAX_NAME_SIZE - 1); - std::memcpy(mName, name, len); - mName[len] = 0; - } - - protected: - void setOutSizes(const int numOutputs, const int *outputSizes) - { - mOutputSizes.resize(numOutputs); - for (int i = 0; i < numOutputs; i++) { - mOutputSizes[i] = outputSizes[i]; - } - } - - Node(const int numInputs, const int *inputSizes, const char *name): - mInputSizes(numInputs), mOutputSizes(numInputs) - { - set(inputSizes, inputSizes, name, (int)strlen(name)); - } - - public: - - Node(const int numInputs, const int *inputSizes, - const int numOutputs, const int *outputSizes, const char *name) - : mInputSizes(numInputs), mOutputSizes(numOutputs) - { - set(inputSizes, outputSizes, name, (int)strlen(name)); - } - - Node(const std::vector &inputSizes, - const std::vector &outputSizes, - const std::string &name) - : mInputSizes((int)inputSizes.size()), mOutputSizes((int)outputSizes.size()) - { - set(&inputSizes[0], &outputSizes[0], name.c_str(), (int)name.size()); - } - - virtual ArrayVector forward(const ArrayVector &input) - { - return input; - } - - virtual ArrayVector backward(const ArrayVector &input, - const ArrayVector &gradOutput) - { - return gradOutput; - } - - virtual void update(float lr) {} - - //TODO: Add a method that actually returns this information to the user - virtual void info() - { - std::cout << "Name: " << mName << std::endl; - std::cout << "Input sizes: " << std::endl; - - for (int i = 0; i < (int)mInputSizes.size(); i++) { - std::cout << mInputSizes[i] << std::endl; - } - - std::cout << "Output sizes: " << std::endl; - for (int i = 0; i < (int)mOutputSizes.size(); i++) { - std::cout << mOutputSizes[i] << std::endl; - } - } - - IntVector getInSizes() const - { - return mInputSizes; - } - - IntVector getOutSizes() const - { - return mOutputSizes; - } - }; - - typedef std::shared_ptr NodePtr; - } -} diff --git a/include/af/nn/Activations/ReLU.hpp b/include/af/nn/Types.hpp similarity index 50% rename from include/af/nn/Activations/ReLU.hpp rename to include/af/nn/Types.hpp index 3c47684..6e7e101 100644 --- a/include/af/nn/Activations/ReLU.hpp +++ b/include/af/nn/Types.hpp @@ -1,5 +1,5 @@ /******************************************************* - * Copyright (c) 2015, ArrayFire + * Copyright (c) 2017, ArrayFire * All rights reserved. * * This file is distributed under 3-clause BSD license. @@ -8,13 +8,15 @@ ********************************************************/ #pragma once -#include -#include +#include -namespace af -{ - namespace nn - { - typedef ThresholdNode ReLU; +namespace af { + namespace nn { + + autograd::Variable input(const af::array &arr); + + autograd::Variable parameter(const af::array &arr); + + autograd::Variable weight(int input_size, int output_size, float spread = 0.05); } } diff --git a/include/af/nn/Weights.hpp b/include/af/nn/Weights.hpp deleted file mode 100644 index 3efe645..0000000 --- a/include/af/nn/Weights.hpp +++ /dev/null @@ -1,146 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace af -{ - namespace nn - { - class Weights - { - ArrayVector mData; - - public: - - Weights() : mData(1) - { - } - - - Weights(int inputSize, int outputSize, float spread) : mData(1) - { - mData[0] = af::randu(outputSize, inputSize) * spread - spread / 2; //Weights - } - - Weights(const af::array &weights) : mData(1) - { - mData[0] = weights; - } - - operator af::array() const - { - return mData[0]; - } - - Weights operator+(const Weights &other) const - { - return mData[0] + other; - } - - Weights operator*(const Weights &other) const - { - return mData[0] * other; - } - - Weights operator/(const Weights &other) const - { - return mData[0] / other; - } - - Weights operator-(const Weights &other) const - { - return mData[0] - other; - } - - Weights operator+=(const Weights &other) - { - mData[0] += other; - return *this; - } - - Weights operator/=(float val) - { - mData[0] /= val; - return *this; - } - - Weights operator*=(const Weights &other) - { - mData[0] *= other; - return *this; - } - - Weights operator-=(float val) - { - mData[0] -= val; - return *this; - } - - void reset() - { - mData[0] = af::constant(0, mData[0].dims()); - } - - void eval() - { - mData[0].eval(); - } - }; - - Weights operator *(const Weights &lhs, const double &rhs) - { - const af::array lhs_arr = lhs; - return lhs_arr * rhs; - } - - Weights operator +(const Weights &lhs, const double &rhs) - { - const af::array lhs_arr = lhs; - return lhs_arr + rhs; - } - - Weights operator /(const Weights &lhs, const double &rhs) - { - const af::array lhs_arr = lhs; - return lhs_arr / rhs; - } - - Weights operator -(const Weights &lhs, const double &rhs) - { - const af::array lhs_arr = lhs; - return lhs_arr - rhs; - } - - Weights operator *(const double &lhs, const Weights &rhs) - { - const af::array rhs_arr = rhs; - return lhs * rhs_arr; - } - - Weights operator +(const double &lhs, const Weights &rhs) - { - const af::array rhs_arr = rhs; - return lhs + rhs_arr; - } - - Weights operator /(const double &lhs, const Weights &rhs) - { - const af::array rhs_arr = rhs; - return lhs / rhs_arr; - } - - Weights operator -(const double &lhs, const Weights &rhs) - { - const af::array rhs_arr = rhs; - return lhs - rhs_arr; - } - } -} diff --git a/include/af/nn/common.hpp b/include/af/nn/common.hpp deleted file mode 100644 index ae6f9c7..0000000 --- a/include/af/nn/common.hpp +++ /dev/null @@ -1,20 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include - -namespace af -{ - const int MAX_NAME_SIZE = 32; - - typedef std::vector IntVector; - typedef std::vector ArrayVector; -} diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp index df6f9e5..ad89ce1 100644 --- a/src/autograd/Variable.cpp +++ b/src/autograd/Variable.cpp @@ -65,7 +65,7 @@ namespace af { } } - af::array Variable::array() const + af::array& Variable::array() const { return m_shared->m_data; } @@ -96,6 +96,17 @@ namespace af { return m_shared->m_calc_grad; } + bool Variable::isGradAvailable() const + { + if (!m_shared->m_calc_grad) return false; + return m_shared->m_grads.size() >= 1; + } + + void Variable::zeroGrad() + { + m_shared->m_grads.clear(); + } + void Variable::setCalcGrad(bool calc_grad) { m_shared->m_calc_grad = calc_grad; diff --git a/src/nn/Modules/Activations.cpp b/src/nn/Modules/Activations.cpp new file mode 100644 index 0000000..0d1ca6e --- /dev/null +++ b/src/nn/Modules/Activations.cpp @@ -0,0 +1,33 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include + +namespace af +{ + namespace nn + { + using namespace autograd; + + Sigmoid::Sigmoid() {} + + Variable Sigmoid::forward(const Variable &input) + { + return sigmoid(input); + } + + Tanh::Tanh() {} + + Variable Tanh::forward(const Variable &input) + { + return tanh(input); + } + } +} diff --git a/src/nn/Modules/Container.cpp b/src/nn/Modules/Container.cpp new file mode 100644 index 0000000..fbccc22 --- /dev/null +++ b/src/nn/Modules/Container.cpp @@ -0,0 +1,42 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include + +namespace af +{ + namespace nn + { + using namespace autograd; + + Container::Container() {} + + ModulePtr Container::get(int id) + { + return m_modules[id]; + } + + std::vector Container::modules() + { + return m_modules; + } + + Sequential::Sequential() {} + + Variable Sequential::forward(const Variable &input) + { + Variable output = input; + for(auto module : m_modules) { + output = module->forward(output); + } + return output; + } + } +} diff --git a/src/nn/Modules/Linear.cpp b/src/nn/Modules/Linear.cpp new file mode 100644 index 0000000..c289ea5 --- /dev/null +++ b/src/nn/Modules/Linear.cpp @@ -0,0 +1,59 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#include + +#include +#include + +namespace af +{ + namespace nn + { + using namespace autograd; + + Linear::Linear(int input_size, int output_size, bool bias, float spread) : + m_bias(bias) + { + auto w = nn::weight(input_size, output_size, spread); + if (bias) { + auto b = nn::weight(1, output_size, spread); + setParams({w, b}); + } else { + setParams({w}); + } + } + + Linear::Linear(const Variable &w) : + m_bias(false), + Module({w}) + { + } + + Linear::Linear(const Variable &w, const Variable &b) : + m_bias(true), + Module({w, b}) + { + if (b.array().dims(0) != w.array().dims(0)) { + throw af::exception("nn:Linear: Dimension mismatch between weight and bias."); + } + if (b.array().dims(1) != 1) { + throw af::exception("nn::Linear: Bias must be a vector."); + } + } + + Variable Linear::forward(const Variable &input) + { + auto res = matmul(m_parameters[0], input); + if (m_bias) { + res = res + expandAs(m_parameters[1], res); + } + return res; + } + } +} diff --git a/src/nn/Modules/Module.cpp b/src/nn/Modules/Module.cpp new file mode 100644 index 0000000..0232197 --- /dev/null +++ b/src/nn/Modules/Module.cpp @@ -0,0 +1,48 @@ +/******************************************************* + * Copyright (c) 2015, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include + +namespace af +{ + namespace nn + { + using autograd::Variable; + Module::Module() : + m_parameters() + { + } + + Module::Module(const std::vector ¶meters) : + m_parameters(parameters.begin(), parameters.end()) + { + } + + void Module::setParams(const std::vector ¶meters) + { + m_parameters.clear(); + for(auto parameter : parameters) { + m_parameters.push_back(parameter); + } + } + + + std::vector Module::parameters() + { + return m_parameters; + } + + void Module::zeroGrad() + { + for (auto ¶meter : m_parameters) { + parameter.zeroGrad(); + } + } + } +} diff --git a/src/nn/Types.cpp b/src/nn/Types.cpp new file mode 100644 index 0000000..698b497 --- /dev/null +++ b/src/nn/Types.cpp @@ -0,0 +1,36 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include + +#include + +namespace af { + namespace nn { + + using autograd::Variable; + + Variable input(const af::array &arr) + { + return Variable(arr, false); + } + + Variable parameter(const af::array &arr) + { + return Variable(arr, true); + } + + Variable weight(int input_size, int output_size, float spread) + { + auto w = af::randu(output_size, input_size) * spread - spread / 2; + w.eval(); + return parameter(w); + } + } +} From 9aefea4a47ec3fc333a6f2a8e4d2c6bdc5169493 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Thu, 6 Jul 2017 00:47:59 -0700 Subject: [PATCH 16/20] Fixing bugs in backward pass for activation functions --- src/autograd/Functions.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp index fcabaaa..71048b6 100644 --- a/src/autograd/Functions.cpp +++ b/src/autograd/Functions.cpp @@ -104,7 +104,7 @@ namespace af { { auto result = exp(input.array()); auto grad_func = [](std::vector &inputs, const Variable &grad_output) { - inputs[0].addGrad(exp(inputs[0])); + inputs[0].addGrad(grad_output * exp(inputs[0])); }; return Variable(result, {input}, grad_func); } @@ -113,7 +113,7 @@ namespace af { { auto result = sin(input.array()); auto grad_func = [](std::vector &inputs, const Variable &grad_output) { - inputs[0].addGrad(cos(inputs[0])); + inputs[0].addGrad(grad_output * cos(inputs[0])); }; return Variable(result, {input}, grad_func); } @@ -122,7 +122,7 @@ namespace af { { auto result = cos(input.array()); auto grad_func = [](std::vector &inputs, const Variable &grad_output) { - inputs[0].addGrad(negate(sin(inputs[0]))); + inputs[0].addGrad(grad_output * negate(sin(inputs[0]))); }; return Variable(result, {input}, grad_func); } @@ -132,7 +132,7 @@ namespace af { auto result = tanh(input.array()); auto grad_func = [](std::vector &inputs, const Variable &grad_output) { auto tmp = tanh(inputs[0]); - inputs[0].addGrad(1.0 - tmp * tmp); + inputs[0].addGrad(grad_output * (1.0 - tmp * tmp)); }; return Variable(result, {input}, grad_func); } @@ -142,7 +142,7 @@ namespace af { auto result = sigmoid(input.array()); auto grad_func = [](std::vector &inputs, const Variable &grad_output) { auto tmp = sigmoid(inputs[0]); - inputs[0].addGrad(tmp * (1 - tmp)); + inputs[0].addGrad(grad_output * tmp * (1 - tmp)); }; return Variable(result, {input}, grad_func); } From 6d5751a853f2909b23b5aeb99aba54ced71dd18b Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Thu, 6 Jul 2017 00:48:23 -0700 Subject: [PATCH 17/20] Fixing perceptron example to use smaller batch size - Trying to solve for the entire batch was a bad idea --- examples/perceptron.cpp | 52 +++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp index 799c52a..0ad8cc0 100644 --- a/examples/perceptron.cpp +++ b/examples/perceptron.cpp @@ -18,8 +18,8 @@ int main() { const int inputSize = 2; const int outputSize = 1; - const int numSamples = 4; const double lr = 0.005; + const int numSamples = 4; float hInput[] = {1, 1, 0, 0, @@ -40,27 +40,45 @@ int main() perceptron.add(nn::Sigmoid()); Variable result; - for (int i = 0; i < 10; i++) { + for (int i = 0; i < 100; i++) { + for (int j = 0; j < numSamples; j++) { + + af::array in_j = in(af::span, j); + af::array out_j = out(af::span, j); - // Forward propagation - result = perceptron.forward(nn::input(in)); + // Forward propagation + result = perceptron.forward(nn::input(in_j)); - // Calculate loss - // TODO: Use loss function - af::array diff = out - result.array(); - printf("Error at iteration(%d) : %lf\n", i + 1, af::max(af::abs(diff))); + // Calculate loss + // TODO: Use loss function + af::array diff = out_j - result.array(); + + // Backward propagation + auto d_result = Variable(diff, false); + result.backward(d_result); + + // Update parameters + // TODO: Should use optimizer + for (auto param : perceptron.parameters()) { + param.array() += lr * param.grad().array(); + param.array().eval(); + } + } - // Backward propagation - auto d_result = Variable(diff, false); - result.backward(d_result); + if ((i + 1) % 10 == 0) { + // Forward propagation + result = perceptron.forward(nn::input(in)); - // Update parameters - // TODO: Should use optimizer - for (auto param : perceptron.parameters()) { - param.array() += lr * param.grad().array(); - param.array().eval(); + // Calculate loss + // TODO: Use loss function + af::array diff = out - result.array(); + printf("Average Error at iteration(%d) : %lf\n", i + 1, af::mean(af::abs(diff))); + printf("Predicted\n"); + af_print(result.array()); + printf("Expected\n"); + af_print(out); + printf("\n\n"); } } - af_print(result.array()); return 0; } From a01504b84ca5aaa9cb59b4ff79f568ddc4454a1d Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Thu, 6 Jul 2017 01:10:53 -0700 Subject: [PATCH 18/20] Adding model.eval() and model.train() --- examples/perceptron.cpp | 10 +++++++--- include/af/nn/Modules/Module.hpp | 4 ++++ src/nn/Modules/Module.cpp | 13 +++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp index 0ad8cc0..96c14af 100644 --- a/examples/perceptron.cpp +++ b/examples/perceptron.cpp @@ -18,7 +18,7 @@ int main() { const int inputSize = 2; const int outputSize = 1; - const double lr = 0.005; + const double lr = 0.1; const int numSamples = 4; float hInput[] = {1, 1, @@ -40,8 +40,10 @@ int main() perceptron.add(nn::Sigmoid()); Variable result; - for (int i = 0; i < 100; i++) { + for (int i = 0; i < 1000; i++) { for (int j = 0; j < numSamples; j++) { + perceptron.train(); + perceptron.zeroGrad(); af::array in_j = in(af::span, j); af::array out_j = out(af::span, j); @@ -65,7 +67,9 @@ int main() } } - if ((i + 1) % 10 == 0) { + if ((i + 1) % 100 == 0) { + perceptron.eval(); + // Forward propagation result = perceptron.forward(nn::input(in)); diff --git a/include/af/nn/Modules/Module.hpp b/include/af/nn/Modules/Module.hpp index a35db1f..fb1e946 100644 --- a/include/af/nn/Modules/Module.hpp +++ b/include/af/nn/Modules/Module.hpp @@ -35,6 +35,10 @@ namespace af void zeroGrad(); + void train(); + + void eval(); + virtual autograd::Variable forward(const autograd::Variable &input) = 0; }; } diff --git a/src/nn/Modules/Module.cpp b/src/nn/Modules/Module.cpp index 0232197..9db16b9 100644 --- a/src/nn/Modules/Module.cpp +++ b/src/nn/Modules/Module.cpp @@ -32,6 +32,19 @@ namespace af } } + void Module::train() + { + for (auto parameter : m_parameters) { + parameter.setCalcGrad(true); + } + } + + void Module::eval() + { + for (auto parameter : m_parameters) { + parameter.setCalcGrad(false); + } + } std::vector Module::parameters() { From 2776aa2c03ae66e502ac6a10a21c9cd673eff5a8 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Thu, 6 Jul 2017 08:46:40 -0700 Subject: [PATCH 19/20] Formatting changes --- examples/perceptron.cpp | 2 +- include/af/autograd/Variable.hpp | 2 +- src/nn/Modules/Module.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp index 96c14af..ffe82d7 100644 --- a/examples/perceptron.cpp +++ b/examples/perceptron.cpp @@ -1,5 +1,5 @@ /******************************************************* - * Copyright (c) 2015, ArrayFire + * Copyright (c) 2017, ArrayFire * All rights reserved. * * This file is distributed under 3-clause BSD license. diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index 8ce10e2..f4deb5f 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -42,7 +42,7 @@ namespace af { GradFunc_t m_grad_func; }; - public: + public: Variable(); Variable(const af::array &data, bool calc_grad); diff --git a/src/nn/Modules/Module.cpp b/src/nn/Modules/Module.cpp index 9db16b9..e350f1d 100644 --- a/src/nn/Modules/Module.cpp +++ b/src/nn/Modules/Module.cpp @@ -1,5 +1,5 @@ /******************************************************* - * Copyright (c) 2015, ArrayFire + * Copyright (c) 2017, ArrayFire * All rights reserved. * * This file is distributed under 3-clause BSD license. From 04cd450b347b1e782fbdf1ed25033c143daa2af6 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Thu, 6 Jul 2017 08:50:48 -0700 Subject: [PATCH 20/20] Use references while iterating when possible --- examples/perceptron.cpp | 2 +- include/af/autograd/Variable.hpp | 4 ++-- src/autograd/Variable.cpp | 8 ++++---- src/nn/Modules/Container.cpp | 2 +- src/nn/Modules/Module.cpp | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp index ffe82d7..d8e7f39 100644 --- a/examples/perceptron.cpp +++ b/examples/perceptron.cpp @@ -61,7 +61,7 @@ int main() // Update parameters // TODO: Should use optimizer - for (auto param : perceptron.parameters()) { + for (auto ¶m : perceptron.parameters()) { param.array() += lr * param.grad().array(); param.array().eval(); } diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp index f4deb5f..330c37f 100644 --- a/include/af/autograd/Variable.hpp +++ b/include/af/autograd/Variable.hpp @@ -52,7 +52,7 @@ namespace af { af::array& array() const; - Variable grad() const; + Variable& grad() const; std::ptrdiff_t id() const; @@ -74,7 +74,7 @@ namespace af { private: void evalGrad(bool retain_grad_graph = false); - std::vector getInputs() const; + std::vector& getInputs() const; static void buildSubGraph(Cache_t &cache, DAG_t &dag, const Variable &var); diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp index ad89ce1..9ff55c5 100644 --- a/src/autograd/Variable.cpp +++ b/src/autograd/Variable.cpp @@ -55,7 +55,7 @@ namespace af { m_shared(nullptr) { bool calc_grad = false; - for (auto input : inputs) { + for (const auto &input : inputs) { calc_grad |= input.isCalcGrad(); } if (calc_grad) { @@ -70,7 +70,7 @@ namespace af { return m_shared->m_data; } - Variable Variable::grad() const + Variable& Variable::grad() const { if (!m_shared->m_calc_grad) { throw af::exception("Gradient calclation disabled."); @@ -86,7 +86,7 @@ namespace af { return (std::ptrdiff_t)m_shared.get(); } - std::vector Variable::getInputs() const + std::vector& Variable::getInputs() const { return m_shared->m_inputs; } @@ -181,7 +181,7 @@ namespace af { if (cache.find(id) != cache.end()) { return; } - for (auto input : var.getInputs()) { + for (const auto &input : var.getInputs()) { Variable::buildSubGraph(cache, dag, input); } cache[id] = true; diff --git a/src/nn/Modules/Container.cpp b/src/nn/Modules/Container.cpp index fbccc22..9078631 100644 --- a/src/nn/Modules/Container.cpp +++ b/src/nn/Modules/Container.cpp @@ -33,7 +33,7 @@ namespace af Variable Sequential::forward(const Variable &input) { Variable output = input; - for(auto module : m_modules) { + for (auto &module : m_modules) { output = module->forward(output); } return output; diff --git a/src/nn/Modules/Module.cpp b/src/nn/Modules/Module.cpp index e350f1d..5126218 100644 --- a/src/nn/Modules/Module.cpp +++ b/src/nn/Modules/Module.cpp @@ -34,14 +34,14 @@ namespace af void Module::train() { - for (auto parameter : m_parameters) { + for (auto ¶meter : m_parameters) { parameter.setCalcGrad(true); } } void Module::eval() { - for (auto parameter : m_parameters) { + for (auto ¶meter : m_parameters) { parameter.setCalcGrad(false); } }