diff --git a/AFBuildMacros.cmake b/AFBuildMacros.cmake deleted file mode 100644 index 64ea17e..0000000 --- a/AFBuildMacros.cmake +++ /dev/null @@ -1,65 +0,0 @@ -# A macro to build an ArrayFire example -# For most uses only FIND_PACKAGE(ArrayFire REQUIRED), ADD_EXECUTABLE(...) -# and TARGET_LINK_LIBRARIES(... ${ARRAYFIRE_LIBRARIES}) are needed -MACRO(BUILD_SRC SRC_NAME SRC_SOURCE BACKEND_NAME BACKEND_LIBRARIES OTHER_LIBRARIES OUT_DIR_NAME) - - ADD_EXECUTABLE(example_${SRC_NAME}_${BACKEND_NAME} ${SRC_SOURCE}) - - TARGET_LINK_LIBRARIES(example_${SRC_NAME}_${BACKEND_NAME} - ${BACKEND_LIBRARIES} ${OTHER_LIBRARIES}) - - SET_TARGET_PROPERTIES(example_${SRC_NAME}_${BACKEND_NAME} - PROPERTIES - OUTPUT_NAME ${SRC_NAME}_${BACKEND_NAME} - RUNTIME_OUTPUT_DIRECTORY ${OUT_DIR_NAME} - FOLDER "${BACKEND_NAME}") -ENDMACRO() - -# A macro to build a list of files -# For most uses only FIND_PACKAGE(ArrayFire REQUIRED), ADD_EXECUTABLE(...) -# and TARGET_LINK_LIBRARIES(... ${ARRAYFIRE_LIBRARIES}) are needed -MACRO(BUILD_BACKEND FILES BACKEND_NAME BACKEND_LIBRARIES OTHER_LIBRARIES) - - FOREACH(FILE ${FILES}) - GET_FILENAME_COMPONENT(SRC ${FILE} NAME_WE) - GET_FILENAME_COMPONENT(FULL_DIR_NAME ${FILE} PATH) - GET_FILENAME_COMPONENT(DIR_NAME ${FULL_DIR_NAME} NAME) - - BUILD_SRC(${SRC} ${FILE} ${BACKEND_NAME} - "${BACKEND_LIBRARIES}" - "${OTHER_LIBRARIES}" ${DIR_NAME}) - ENDFOREACH() -ENDMACRO() - -MACRO(BUILD_ALL FILES) - FIND_PACKAGE(ArrayFire REQUIRED) - FIND_PACKAGE(CUDA QUIET) - FIND_PACKAGE(OpenCL QUIET) - - INCLUDE_DIRECTORIES( - "${CMAKE_CURRENT_SOURCE_DIR}/include" - ${ArrayFire_INCLUDE_DIRS} - ) - - IF (${ArrayFire_CPU_FOUND}) - MESSAGE(STATUS "CPU backend is ON.") - BUILD_BACKEND("${FILES}" cpu ${ArrayFire_CPU_LIBRARIES} "") - ENDIF() - - IF (${OpenCL_FOUND} AND ${ArrayFire_OpenCL_FOUND}) - MESSAGE(STATUS "OPENCL backend is ON.") - BUILD_BACKEND("${FILES}" opencl ${ArrayFire_OpenCL_LIBRARIES} "${OpenCL_LIBRARIES}") - ENDIF() - - IF (${CUDA_FOUND} AND ${ArrayFire_CUDA_FOUND}) - FIND_LIBRARY( CUDA_NVVM_LIBRARY - NAMES "nvvm" - PATH_SUFFIXES "nvvm/lib64" "nvvm/lib" - PATHS ${CUDA_TOOLKIT_ROOT_DIR} - DOC "CUDA NVVM Library" - ) - - MESSAGE(STATUS "CUDA backend is ON.") - BUILD_BACKEND("${FILES}" cuda ${ArrayFire_CUDA_LIBRARIES} "${CUDA_CUBLAS_LIBRARIES};${CUDA_LIBRARIES};${CUDA_cusolver_LIBRARY};${CUDA_CUFFT_LIBRARIES};${CUDA_NVVM_LIBRARY};${CUDA_CUDA_LIBRARY}") - ENDIF() -ENDMACRO() diff --git a/CMakeLists.txt b/CMakeLists.txt index d03e555..c84783d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,26 +1,41 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.8) -PROJECT(ARRAYFIRE_ML) - -SET_PROPERTY(GLOBAL PROPERTY USE_FOLDERS ON) -ADD_DEFINITIONS(-Wall -std=c++11 -fvisibility=hidden) - -OPTION(BUILD_TEST "Build Tests" ON) - -# Header files -IF(NOT DEFINED AFML_INSTALL_INC_DIR) - SET(AFML_INSTALL_INC_DIR "include" CACHE PATH "Installation path for headers") -ENDIF() - -IF (BUILD_TEST) - FILE(GLOB FILES "test/*.cpp") - INCLUDE("${CMAKE_CURRENT_SOURCE_DIR}/AFBuildMacros.cmake") - BUILD_ALL("${FILES}") -ENDIF() - -INSTALL(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/" DESTINATION "${AFML_INSTALL_INC_DIR}" - COMPONENT headers - FILES_MATCHING - PATTERN "*.h" - PATTERN "*.hpp" - PATTERN ".gitignore" EXCLUDE -) +cmake_minimum_required(VERSION 3.5.1) + +project(ArrayFireML + VERSION 0.1.0 + LANGUAGES C CXX) + +find_package(ArrayFire REQUIRED) + +add_library(afml SHARED "") + +target_sources(afml + PRIVATE + src/autograd/Functions.cpp + src/autograd/Variable.cpp + src/nn/Modules/Activations.cpp + src/nn/Modules/Container.cpp + src/nn/Modules/Linear.cpp + src/nn/Modules/Module.cpp + src/nn/Types.cpp + ) + +target_include_directories(afml + PUBLIC + ${ArrayFire_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR}/include + ) + +target_link_libraries(afml + PUBLIC + af + ) + +set_target_properties(afml + PROPERTIES + VERSION "${ArrayFireML_VERSION}" + SOVERSION "${ArrayFireML_VERSION_MAJOR}" + CXX_STANDARD 11 + ) + + +add_subdirectory(examples) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..b1e2404 --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,18 @@ +function(build_example SRC) + get_filename_component(src_name ${SRC} NAME_WE) + set(target "${src_name}") + add_executable(${target} ${SRC}) + target_link_libraries(${target} + PRIVATE + afml + ) + target_compile_features(${target} + PRIVATE cxx_range_for) +endfunction(build_example) + +# build_example(Activations.cpp) +# build_example(FFNet.cpp) +# build_example(Node.cpp) +build_example(perceptron.cpp) +# build_example(Weights.cpp) +build_example(autograd.cpp) diff --git a/examples/autograd.cpp b/examples/autograd.cpp new file mode 100644 index 0000000..702992b --- /dev/null +++ b/examples/autograd.cpp @@ -0,0 +1,172 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include + +#define VERIFY(VAL) do { \ + auto res = af::allTrue(af::abs(VAL) < 1E-5); \ + printf("%s:%d %s\n", __FUNCTION__, __LINE__, \ + res ? "PASS" : "FAIL"); \ + } while(0) + +using af::autograd::Variable; +void test_multiply() +{ + auto x = Variable(af::randu(5), true); + auto y = x * x; + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + VERIFY(dx.array() - 2 * x.array()); +} + +void test_multipl_add() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = x * x + x * y + y * y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + VERIFY(dx.array() - 2 * x.array() - y.array()); + VERIFY(dy.array() - 2 * y.array() - x.array()); +} + +void test_no_calc_grad() +{ + auto x = Variable(af::randu(5), false); + auto y = Variable(af::randu(5), true); + auto z = x * x + x * y + y * y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dy = y.grad(); + VERIFY(dy.array() - 2 * y.array() - x.array()); + try { + auto dx = x.grad(); + } catch(af::exception &ex) { + std::cout << ex.what() << std::endl; + return; + } + printf("%s:%d No Gradient check Failed\n"); +} + +void test_multiply_sub() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = x * x - x * y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + VERIFY(dx.array() - (2 * x.array() - y.array())); + VERIFY(dy.array() - (-x.array())); +} + +void test_divide_add() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = x + x / y + y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + VERIFY(dx.array() - (1.0 + 1.0 / y.array())); + VERIFY(dy.array() - (1.0 - x.array() / (y.array() * y.array()))); +} + +void test_multiply_add_scalar() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5), true); + auto z = 2 * x + x * y + y; + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dx = x.grad(); + auto dy = y.grad(); + VERIFY(dx.array() - (2.0 + y.array())); + VERIFY(dy.array() - (1.0 + x.array())); +} + +void test_exp() +{ + auto x = Variable(af::randu(5), true); + auto y = exp(x); + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + VERIFY(dx.array() - (af::exp(x.array()))); +} + +void test_sigmoid() +{ + auto x = Variable(af::randu(5), true); + auto y = sigmoid(x); + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + VERIFY(dx.array() - (y.array() * (1 - y.array()))); + VERIFY(dx.array() - (af::sigmoid(x.array()) * (1 - af::sigmoid(x.array())))); +} + +void test_tanh() +{ + auto x = Variable(af::randu(5), true); + auto y = tanh(x); + auto dy = Variable(af::constant(1.0, 5), false); + y.backward(dy); + auto dx = x.grad(); + VERIFY(dx.array() - (1 - y.array() * y.array())); + VERIFY(dx.array() - (1 + af::tanh(x.array())) * (1 - af::tanh(x.array()))); +} + +void test_expand() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5, 2), true); + auto z = y * expandAs(x, y); + auto dz = Variable(af::constant(1.0, 5, 2), false); + z.backward(dz); + auto dy = y.grad(); + auto dx = x.grad(); + VERIFY(dy.array() - af::tile(x.array(), 1, 2)); + VERIFY(dx.array() - af::sum(y.array(), 1)); +} + +void test_reduce() +{ + auto x = Variable(af::randu(5), true); + auto y = Variable(af::randu(5, 2), true); + auto z = x * reduceAs(y, x); + auto dz = Variable(af::constant(1.0, 5), false); + z.backward(dz); + auto dy = y.grad(); + auto dx = x.grad(); + VERIFY(dy.array() - af::tile(x.array(), 1, 2)); + VERIFY(dx.array() - af::sum(y.array(), 1)); +} + +int main() +{ + af::info(); + test_multiply(); + test_multipl_add(); + test_no_calc_grad(); + test_multiply_sub(); + test_divide_add(); + test_multiply_add_scalar(); + test_exp(); + test_sigmoid(); + test_tanh(); + test_expand(); + test_reduce(); + return 0; +} diff --git a/examples/perceptron.cpp b/examples/perceptron.cpp new file mode 100644 index 0000000..d8e7f39 --- /dev/null +++ b/examples/perceptron.cpp @@ -0,0 +1,88 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include + +using namespace af; +using namespace af::nn; +using namespace af::autograd; + +int main() +{ + const int inputSize = 2; + const int outputSize = 1; + const double lr = 0.1; + const int numSamples = 4; + + float hInput[] = {1, 1, + 0, 0, + 1, 0, + 0, 1}; + + float hOutput[] = {1, + 0, + 1, + 1}; + + auto in = af::array(inputSize, numSamples, hInput); + auto out = af::array(outputSize, numSamples, hOutput); + + nn::Sequential perceptron; + + perceptron.add(nn::Linear(inputSize, outputSize)); + perceptron.add(nn::Sigmoid()); + + Variable result; + for (int i = 0; i < 1000; i++) { + for (int j = 0; j < numSamples; j++) { + perceptron.train(); + perceptron.zeroGrad(); + + af::array in_j = in(af::span, j); + af::array out_j = out(af::span, j); + + // Forward propagation + result = perceptron.forward(nn::input(in_j)); + + // Calculate loss + // TODO: Use loss function + af::array diff = out_j - result.array(); + + // Backward propagation + auto d_result = Variable(diff, false); + result.backward(d_result); + + // Update parameters + // TODO: Should use optimizer + for (auto ¶m : perceptron.parameters()) { + param.array() += lr * param.grad().array(); + param.array().eval(); + } + } + + if ((i + 1) % 100 == 0) { + perceptron.eval(); + + // Forward propagation + result = perceptron.forward(nn::input(in)); + + // Calculate loss + // TODO: Use loss function + af::array diff = out - result.array(); + printf("Average Error at iteration(%d) : %lf\n", i + 1, af::mean(af::abs(diff))); + printf("Predicted\n"); + af_print(result.array()); + printf("Expected\n"); + af_print(out); + printf("\n\n"); + } + } + return 0; +} diff --git a/include/afml/nn/Networks.hpp b/include/af/autograd.h similarity index 73% rename from include/afml/nn/Networks.hpp rename to include/af/autograd.h index d6a08ca..ceda2b1 100644 --- a/include/afml/nn/Networks.hpp +++ b/include/af/autograd.h @@ -1,11 +1,10 @@ /******************************************************* - * Copyright (c) 2015, ArrayFire + * Copyright (c) 2017, ArrayFire * All rights reserved. * * This file is distributed under 3-clause BSD license. * The complete license agreement can be obtained at: * http://arrayfire.com/licenses/BSD-3-Clause ********************************************************/ -#pragma once - -#include +#include +#include diff --git a/include/af/autograd/Functions.hpp b/include/af/autograd/Functions.hpp new file mode 100644 index 0000000..17a190e --- /dev/null +++ b/include/af/autograd/Functions.hpp @@ -0,0 +1,48 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +namespace af { + namespace autograd { + + class Variable; + + Variable operator +(const Variable &lhs, const Variable &rhs); + Variable operator *(const Variable &lhs, const Variable &rhs); + Variable operator -(const Variable &lhs, const Variable &rhs); + Variable operator /(const Variable &lhs, const Variable &rhs); + + Variable operator +(const double &lhs, const Variable &rhs); + Variable operator *(const double &lhs, const Variable &rhs); + Variable operator -(const double &lhs, const Variable &rhs); + Variable operator /(const double &lhs, const Variable &rhs); + + Variable operator +(const Variable &lhs, const double &rhs); + Variable operator *(const Variable &lhs, const double &rhs); + Variable operator -(const Variable &lhs, const double &rhs); + Variable operator /(const Variable &lhs, const double &rhs); + + Variable negate(const Variable &input); + Variable reciprocal(const Variable &input); + + Variable exp(const Variable &input); + Variable sin(const Variable &input); + Variable cos(const Variable &input); + Variable tanh(const Variable &input); + Variable sigmoid(const Variable &input); + + Variable transpose(const Variable &input); + Variable expandAs(const Variable &input, const Variable &reference); + Variable reduceAs(const Variable &input, const Variable &reference); + + Variable matmul(const Variable &lhs, const Variable &rhs); + Variable matmulTN(const Variable &lhs, const Variable &rhs); + Variable matmulNT(const Variable &lhs, const Variable &rhs); + } +} diff --git a/include/af/autograd/Variable.hpp b/include/af/autograd/Variable.hpp new file mode 100644 index 0000000..330c37f --- /dev/null +++ b/include/af/autograd/Variable.hpp @@ -0,0 +1,86 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace af { + namespace autograd { + class Variable + { + public: + typedef std::function &, const Variable &)> GradFunc_t; + typedef std::unordered_map Cache_t; + typedef std::vector DAG_t; + + private: + struct Shared { + Shared(); + Shared(const af::array &data, bool calc_grad); + Shared(const af::array &data, + const std::vector &inputs, + GradFunc_t grad_func, + bool calc_grad); + + bool m_calc_grad; + af::array m_data; + std::vector m_inputs; + std::vector m_grads; + GradFunc_t m_grad_func; + }; + + public: + + Variable(); + Variable(const af::array &data, bool calc_grad); + Variable(const af::array &data, + const std::vector &inputs, + GradFunc_t grad_func); + + af::array& array() const; + + Variable& grad() const; + + std::ptrdiff_t id() const; + + bool isCalcGrad() const; + + bool isGradAvailable() const; + + void zeroGrad(); + + void setCalcGrad(bool calc_grad); + + void addGrad(const Variable &child_grad); + + void calcGradInputs(bool retain_grad_graph = false); + + void backward(const Variable &grad, bool retain_grad_graph = false); + + + private: + void evalGrad(bool retain_grad_graph = false); + + std::vector& getInputs() const; + + static void buildSubGraph(Cache_t &cache, DAG_t &dag, const Variable &var); + + static DAG_t build(const Variable &var); + + std::shared_ptr m_shared; + }; + } +} diff --git a/include/afml.h b/include/af/nn.h similarity index 77% rename from include/afml.h rename to include/af/nn.h index 279f7aa..88333fc 100644 --- a/include/afml.h +++ b/include/af/nn.h @@ -1,5 +1,5 @@ /******************************************************* - * Copyright (c) 2015, ArrayFire + * Copyright (c) 2017, ArrayFire * All rights reserved. * * This file is distributed under 3-clause BSD license. @@ -8,4 +8,6 @@ ********************************************************/ #pragma once -#include + +#include +#include diff --git a/include/afml/nn/Nodes.hpp b/include/af/nn/Modules.hpp similarity index 62% rename from include/afml/nn/Nodes.hpp rename to include/af/nn/Modules.hpp index df6bffc..310e4e9 100644 --- a/include/afml/nn/Nodes.hpp +++ b/include/af/nn/Modules.hpp @@ -1,5 +1,5 @@ /******************************************************* - * Copyright (c) 2015, ArrayFire + * Copyright (c) 2017, ArrayFire * All rights reserved. * * This file is distributed under 3-clause BSD license. @@ -8,5 +8,7 @@ ********************************************************/ #pragma once -#include -#include +#include +#include +#include +#include diff --git a/include/af/nn/Modules/Activations.hpp b/include/af/nn/Modules/Activations.hpp new file mode 100644 index 0000000..1530cd9 --- /dev/null +++ b/include/af/nn/Modules/Activations.hpp @@ -0,0 +1,34 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include +#include + +namespace af +{ + namespace nn + { + class Sigmoid : public Module + { + public: + Sigmoid(); + + autograd::Variable forward(const autograd::Variable &input); + }; + + class Tanh : public Module + { + public: + Tanh(); + + autograd::Variable forward(const autograd::Variable &input); + }; + } +} diff --git a/include/af/nn/Modules/Container.hpp b/include/af/nn/Modules/Container.hpp new file mode 100644 index 0000000..2ee8c0e --- /dev/null +++ b/include/af/nn/Modules/Container.hpp @@ -0,0 +1,57 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include +#include +#include + +namespace af +{ + namespace nn + { + + typedef std::shared_ptr ModulePtr; + + class Container : public Module + { + protected: + + std::vector m_modules; + + Container(); + + public: + + template + void add(T module) + { + m_modules.emplace_back(new T(module)); + for (auto param : module.parameters()) { + m_parameters.push_back(param); + } + } + + ModulePtr get(int id); + + std::vector modules(); + + virtual autograd::Variable forward(const autograd::Variable &input) = 0; + }; + + class Sequential : public Container + { + public: + + Sequential(); + + virtual autograd::Variable forward(const autograd::Variable &input); + }; + } +} diff --git a/include/af/nn/Modules/Linear.hpp b/include/af/nn/Modules/Linear.hpp new file mode 100644 index 0000000..f7a1ecc --- /dev/null +++ b/include/af/nn/Modules/Linear.hpp @@ -0,0 +1,31 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include + +namespace af +{ + namespace nn + { + class Linear : public Module + { + private: + bool m_bias; + public: + Linear(int input_size, int output_size, bool bias = true, float spread = 0.05); + + Linear(const autograd::Variable &w); + + Linear(const autograd::Variable &w, const autograd::Variable &b); + + autograd::Variable forward(const autograd::Variable &input); + }; + } +} diff --git a/include/af/nn/Modules/Module.hpp b/include/af/nn/Modules/Module.hpp new file mode 100644 index 0000000..fb1e946 --- /dev/null +++ b/include/af/nn/Modules/Module.hpp @@ -0,0 +1,45 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#pragma once + +#include +#include + +#include + +namespace af +{ + namespace nn + { + + class Module + { + protected: + std::vector m_parameters; + + Module(); + + Module(const std::vector ¶meters); + + void setParams(const std::vector ¶meters); + + public: + + std::vector parameters(); + + void zeroGrad(); + + void train(); + + void eval(); + + virtual autograd::Variable forward(const autograd::Variable &input) = 0; + }; + } +} diff --git a/include/afml/nn/Activations/ReLU.hpp b/include/af/nn/Types.hpp similarity index 50% rename from include/afml/nn/Activations/ReLU.hpp rename to include/af/nn/Types.hpp index 55b3703..6e7e101 100644 --- a/include/afml/nn/Activations/ReLU.hpp +++ b/include/af/nn/Types.hpp @@ -1,5 +1,5 @@ /******************************************************* - * Copyright (c) 2015, ArrayFire + * Copyright (c) 2017, ArrayFire * All rights reserved. * * This file is distributed under 3-clause BSD license. @@ -8,13 +8,15 @@ ********************************************************/ #pragma once -#include -#include +#include -namespace afml -{ - namespace nn - { - typedef ThresholdNode ReLU; +namespace af { + namespace nn { + + autograd::Variable input(const af::array &arr); + + autograd::Variable parameter(const af::array &arr); + + autograd::Variable weight(int input_size, int output_size, float spread = 0.05); } } diff --git a/include/afml/nn.h b/include/afml/nn.h deleted file mode 100644 index 98a7a4b..0000000 --- a/include/afml/nn.h +++ /dev/null @@ -1,16 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#pragma once - -#include -#include -#include -#include -#include diff --git a/include/afml/nn/Activations.hpp b/include/afml/nn/Activations.hpp deleted file mode 100644 index 46d7907..0000000 --- a/include/afml/nn/Activations.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include -#include -#include -#include diff --git a/include/afml/nn/Activations/Activation.hpp b/include/afml/nn/Activations/Activation.hpp deleted file mode 100644 index 52e0346..0000000 --- a/include/afml/nn/Activations/Activation.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include - -namespace afml -{ - - namespace nn - { - class ActivationNode : public Node - { - protected: - - virtual af::array fn(const af::array &val) - { - return val; - } - - virtual af::array dfn(const af::array &val) - { - return af::constant(1, val.dims()); - } - - public: - - ActivationNode(int size, const char *name="none") : - Node(1, &size, 1, &size, name) - { - } - - ArrayVector forward(const ArrayVector &input) - { - return { fn(input[0]) }; - } - - ArrayVector backward(const ArrayVector &input, - const ArrayVector &gradOutput) - { - return { gradOutput[0] * dfn(input[0]) }; - } - }; - - typedef ActivationNode Activation; - } -} diff --git a/include/afml/nn/Activations/Sigmoid.hpp b/include/afml/nn/Activations/Sigmoid.hpp deleted file mode 100644 index 6a45cac..0000000 --- a/include/afml/nn/Activations/Sigmoid.hpp +++ /dev/null @@ -1,43 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace afml -{ - namespace nn - { - class SigmoidNode : public ActivationNode - { - private: - - af::array fn(const af::array &input) - { - // TODO: replace with af::sigmoid - return 1 / (1 + af::exp(-input)); - } - - af::array dfn(const af::array &input) - { - af::array output = fn(input); - return output * (1 - output); - } - - public: - - SigmoidNode(int size, const char *name="none") : - ActivationNode(size, name) - { - } - }; - - typedef SigmoidNode Sigmoid; - } -} diff --git a/include/afml/nn/Activations/Tanh.hpp b/include/afml/nn/Activations/Tanh.hpp deleted file mode 100644 index da5e7ef..0000000 --- a/include/afml/nn/Activations/Tanh.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace afml -{ - namespace nn - { - class TanhNode : public ActivationNode - { - private: - - af::array fn(const af::array &input) - { - return af::tanh(input); - } - - af::array dfn(const af::array &input) - { - af::array output = fn(input); - return (1 - output * output); - } - public: - TanhNode(int size, const char *name="none") : - ActivationNode(size, name) - { - } - }; - - typedef TanhNode Tanh; - } -} diff --git a/include/afml/nn/Activations/Threshold.hpp b/include/afml/nn/Activations/Threshold.hpp deleted file mode 100644 index 75dfc86..0000000 --- a/include/afml/nn/Activations/Threshold.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace afml -{ - namespace nn - { - class ThresholdNode : public ActivationNode - { - private: - float mVal; - - af::array fn(const af::array &input) - { - af::array cond = (input >= mVal); - return (cond) * input + (1 - cond) * mVal; - } - - af::array dfn(const af::array &input) - { - return (input >= mVal).as(input.type()); - } - public: - ThresholdNode(int size, float val, const char *name="none") : - ActivationNode(size, name), - mVal(val) - { - } - }; - - typedef ThresholdNode Threshold; - } -} diff --git a/include/afml/nn/Networks/FFNet.hpp b/include/afml/nn/Networks/FFNet.hpp deleted file mode 100644 index 2308c02..0000000 --- a/include/afml/nn/Networks/FFNet.hpp +++ /dev/null @@ -1,104 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include -#include -#include - -namespace afml -{ - namespace nn - { - class FeedForwardNetwork : public Node - { - private: - IntVector mNodeSizes; - std::vector mNodes; - std::vector mData; - - template - FeedForwardNetwork& addNodePtr(NodeType *nodePtr) - { - mNodes.emplace_back(nodePtr); - - // TODO: Throw exception of node.getOutSizes() has >1 length - int size = nodePtr->getOutSizes()[0]; - mNodeSizes.push_back(size); - this->setOutSizes(1, &size); - return *this; - } - - public: - - FeedForwardNetwork(const int inputSize, const char *name="none") : - Node(1, &inputSize, name), - mNodeSizes(1), - mNodes(0), - mData(0) - { - mNodeSizes[0] = inputSize; - } - - template - FeedForwardNetwork& addNode(const NodeType &node) - { - return addNodePtr(new NodeType(node)); - } - - - FeedForwardNetwork& addLinearNode(const int size, const float spread = 0.05) - { - return addNodePtr(new LinearNode(mNodeSizes.back(), size, spread)); - } - - template - FeedForwardNetwork& addActivationNode() - { - int size = (int)mNodeSizes.back(); - - // Ensure ActivationType is derived from ActivationNode - ActivationNode *node = new ActivationType(size); - - return addNodePtr(node); - } - - ArrayVector forward(const ArrayVector &input) - { - mData.resize(mNodeSizes.size()); - mData[0] = input; - for (int i = 0; i < (int)mNodes.size(); i++) { - mData[i + 1] = mNodes[i]->forward(mData[i]); - } - return mData.back(); - } - - ArrayVector backward(const ArrayVector &input, - const ArrayVector &gradOutput) - { - //TODO: Assert input coming is same as the stored input - ArrayVector currGradOutput = gradOutput; - for (int i = (int)mNodes.size() - 1; i >= 0; i--) { - currGradOutput = mNodes[i]->backward(mData[i], currGradOutput); - } - return currGradOutput; - } - - void update(float lr) - { - for(int i = 0; i < (int)mNodes.size(); i++) { - mNodes[i]->update(lr); - } - } - }; - - typedef FeedForwardNetwork FFNet; - } -} diff --git a/include/afml/nn/Nodes/Linear.hpp b/include/afml/nn/Nodes/Linear.hpp deleted file mode 100644 index 479ac2b..0000000 --- a/include/afml/nn/Nodes/Linear.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include -#include - -namespace afml -{ - namespace nn - { - class LinearNode : public Node - { - private: - - Weights mWeight, mBias; - Weights mWeightDiff, mBiasDiff; - - public: - - LinearNode(const int inputSize, const int outputSize, - float spread = 0.05, - const char *name="none") : - Node(1, &inputSize, 1, &outputSize, name), - mWeight(inputSize, outputSize, spread), - mBias(1, outputSize, spread), - mWeightDiff(), mBiasDiff() - { - } - - ArrayVector forward(const ArrayVector &input) - { - return {af::matmul(mWeight, input[0]) + - af::tile(mBias, 1, input[0].dims(1))}; - } - - ArrayVector backward(const ArrayVector &input, - const ArrayVector &gradOutput) - { - float m = input[0].dims(1); - - mWeightDiff = af::matmulNT(gradOutput[0], input[0]) / m; - mBiasDiff = af::sum(gradOutput[0], 1) / m; - - return { af::matmulTN(mWeight, gradOutput[0]) }; - } - - void update(float lr) - { - mWeight += lr * mWeightDiff; - mBias += lr * mBiasDiff; - - mWeight.eval(); - mBias.eval(); - - mWeightDiff.reset(); - mBiasDiff.reset(); - } - }; - } -} diff --git a/include/afml/nn/Nodes/Node.hpp b/include/afml/nn/Nodes/Node.hpp deleted file mode 100644 index 4e522d0..0000000 --- a/include/afml/nn/Nodes/Node.hpp +++ /dev/null @@ -1,115 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include - -#include -#include - -namespace afml -{ - namespace nn - { - - class Node - { - private: - IntVector mInputSizes; - IntVector mOutputSizes; - - char mName[MAX_NAME_SIZE]; - - void set(const int *inputSizes, const int *outputSizes, - const char *name, const int count) - { - for (int i = 0; i < (int)mInputSizes.size(); i++) mInputSizes[i] = inputSizes[i]; - for (int i = 0; i < (int)mOutputSizes.size(); i++) mOutputSizes[i] = outputSizes[i]; - - int len = std::min(count, MAX_NAME_SIZE - 1); - std::memcpy(mName, name, len); - mName[len] = 0; - } - - protected: - void setOutSizes(const int numOutputs, const int *outputSizes) - { - mOutputSizes.resize(numOutputs); - for (int i = 0; i < numOutputs; i++) { - mOutputSizes[i] = outputSizes[i]; - } - } - - Node(const int numInputs, const int *inputSizes, const char *name): - mInputSizes(numInputs), mOutputSizes(numInputs) - { - set(inputSizes, inputSizes, name, (int)strlen(name)); - } - - public: - - Node(const int numInputs, const int *inputSizes, - const int numOutputs, const int *outputSizes, const char *name) - : mInputSizes(numInputs), mOutputSizes(numOutputs) - { - set(inputSizes, outputSizes, name, (int)strlen(name)); - } - - Node(const std::vector &inputSizes, - const std::vector &outputSizes, - const std::string &name) - : mInputSizes((int)inputSizes.size()), mOutputSizes((int)outputSizes.size()) - { - set(&inputSizes[0], &outputSizes[0], name.c_str(), (int)name.size()); - } - - virtual ArrayVector forward(const ArrayVector &input) - { - return input; - } - - virtual ArrayVector backward(const ArrayVector &input, - const ArrayVector &gradOutput) - { - return gradOutput; - } - - virtual void update(float lr) {} - - //TODO: Add a method that actually returns this information to the user - virtual void info() - { - std::cout << "Name: " << mName << std::endl; - std::cout << "Input sizes: " << std::endl; - - for (int i = 0; i < (int)mInputSizes.size(); i++) { - std::cout << mInputSizes[i] << std::endl; - } - - std::cout << "Output sizes: " << std::endl; - for (int i = 0; i < (int)mOutputSizes.size(); i++) { - std::cout << mOutputSizes[i] << std::endl; - } - } - - IntVector getInSizes() const - { - return mInputSizes; - } - - IntVector getOutSizes() const - { - return mOutputSizes; - } - }; - - typedef std::shared_ptr NodePtr; - } -} diff --git a/include/afml/nn/Weights.hpp b/include/afml/nn/Weights.hpp deleted file mode 100644 index 7250ba7..0000000 --- a/include/afml/nn/Weights.hpp +++ /dev/null @@ -1,146 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include - -namespace afml -{ - namespace nn - { - class Weights - { - ArrayVector mData; - - public: - - Weights() : mData(1) - { - } - - - Weights(int inputSize, int outputSize, float spread) : mData(1) - { - mData[0] = af::randu(outputSize, inputSize) * spread - spread / 2; //Weights - } - - Weights(const af::array &weights) : mData(1) - { - mData[0] = weights; - } - - operator af::array() const - { - return mData[0]; - } - - Weights operator+(const Weights &other) const - { - return mData[0] + other; - } - - Weights operator*(const Weights &other) const - { - return mData[0] * other; - } - - Weights operator/(const Weights &other) const - { - return mData[0] / other; - } - - Weights operator-(const Weights &other) const - { - return mData[0] - other; - } - - Weights operator+=(const Weights &other) - { - mData[0] += other; - return *this; - } - - Weights operator/=(float val) - { - mData[0] /= val; - return *this; - } - - Weights operator*=(const Weights &other) - { - mData[0] *= other; - return *this; - } - - Weights operator-=(float val) - { - mData[0] -= val; - return *this; - } - - void reset() - { - mData[0] = af::constant(0, mData[0].dims()); - } - - void eval() - { - mData[0].eval(); - } - }; - - Weights operator *(const Weights &lhs, const double &rhs) - { - const af::array lhs_arr = lhs; - return lhs_arr * rhs; - } - - Weights operator +(const Weights &lhs, const double &rhs) - { - const af::array lhs_arr = lhs; - return lhs_arr + rhs; - } - - Weights operator /(const Weights &lhs, const double &rhs) - { - const af::array lhs_arr = lhs; - return lhs_arr / rhs; - } - - Weights operator -(const Weights &lhs, const double &rhs) - { - const af::array lhs_arr = lhs; - return lhs_arr - rhs; - } - - Weights operator *(const double &lhs, const Weights &rhs) - { - const af::array rhs_arr = rhs; - return lhs * rhs_arr; - } - - Weights operator +(const double &lhs, const Weights &rhs) - { - const af::array rhs_arr = rhs; - return lhs + rhs_arr; - } - - Weights operator /(const double &lhs, const Weights &rhs) - { - const af::array rhs_arr = rhs; - return lhs / rhs_arr; - } - - Weights operator -(const double &lhs, const Weights &rhs) - { - const af::array rhs_arr = rhs; - return lhs - rhs_arr; - } - } -} diff --git a/include/afml/util/common.hpp b/include/afml/util/common.hpp deleted file mode 100644 index a9d7123..0000000 --- a/include/afml/util/common.hpp +++ /dev/null @@ -1,20 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ -#pragma once - -#include -#include - -namespace afml -{ - const int MAX_NAME_SIZE = 32; - - typedef std::vector IntVector; - typedef std::vector ArrayVector; -} diff --git a/src/autograd/Functions.cpp b/src/autograd/Functions.cpp new file mode 100644 index 0000000..71048b6 --- /dev/null +++ b/src/autograd/Functions.cpp @@ -0,0 +1,251 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include + +namespace af { + namespace autograd { + + Variable operator +(const Variable &lhs, const Variable &rhs) + { + auto result = lhs.array() + rhs.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(grad_output); + inputs[1].addGrad(grad_output); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + Variable operator -(const Variable &lhs, const Variable &rhs) + { + auto result = lhs.array() - rhs.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(grad_output); + inputs[1].addGrad(negate(grad_output)); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + Variable operator *(const Variable &lhs, const Variable &rhs) + { + auto result = lhs.array() * rhs.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(grad_output * inputs[1]); + inputs[1].addGrad(grad_output * inputs[0]); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + Variable operator /(const Variable &lhs, const Variable &rhs) + { + auto result = lhs.array() / rhs.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto inputs_1_rec = reciprocal(inputs[1]); + auto grad_input_0 = grad_output * inputs_1_rec; + inputs[0].addGrad(grad_input_0); + inputs[1].addGrad(grad_input_0 * negate(inputs[0]) * inputs_1_rec); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + +#define INSTANTIATE_OPERATOR(OP) \ + Variable operator OP(const double &lhs_val, const Variable &rhs) \ + { \ + auto lhs = Variable( \ + af::constant(lhs_val, \ + rhs.array().dims(), \ + rhs.array().type()), \ + false); \ + return lhs OP rhs; \ + } \ + Variable operator OP(const Variable &lhs, const double &rhs_val) \ + { \ + auto rhs = Variable( \ + af::constant(rhs_val, \ + lhs.array().dims(), lhs.array().type()), \ + false); \ + return lhs OP rhs; \ + } \ + + INSTANTIATE_OPERATOR(+) + INSTANTIATE_OPERATOR(-) + INSTANTIATE_OPERATOR(*) + INSTANTIATE_OPERATOR(/) + +#undef INSTANTIATE_OPERATOR + + Variable negate(const Variable &input) + { + auto result = 0.0 - input.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(negate(grad_output)); + }; + return Variable(result, {input}, grad_func); + } + + Variable reciprocal(const Variable &input) + { + auto result = 1.0 / input.array(); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto res = reciprocal(inputs[0]); + inputs[0].addGrad(negate(grad_output) * res * res); + }; + return Variable(result, {input}, grad_func); + } + + Variable exp(const Variable &input) + { + auto result = exp(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(grad_output * exp(inputs[0])); + }; + return Variable(result, {input}, grad_func); + } + + Variable sin(const Variable &input) + { + auto result = sin(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(grad_output * cos(inputs[0])); + }; + return Variable(result, {input}, grad_func); + } + + Variable cos(const Variable &input) + { + auto result = cos(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(grad_output * negate(sin(inputs[0]))); + }; + return Variable(result, {input}, grad_func); + } + + Variable tanh(const Variable &input) + { + auto result = tanh(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto tmp = tanh(inputs[0]); + inputs[0].addGrad(grad_output * (1.0 - tmp * tmp)); + }; + return Variable(result, {input}, grad_func); + } + + Variable sigmoid(const Variable &input) + { + auto result = sigmoid(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + auto tmp = sigmoid(inputs[0]); + inputs[0].addGrad(grad_output * tmp * (1 - tmp)); + }; + return Variable(result, {input}, grad_func); + } + + Variable transpose(const Variable &input) + { + auto result = transpose(input.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(transpose(grad_output)); + }; + return Variable(result, {input}, grad_func); + } + + Variable expandAs(const Variable &input, const Variable &reference) + { + dim4 dims(1,1,1,1); + dim4 idims = input.array().dims(); + dim4 rdims = reference.array().dims(); + for (int i = 0; i < 4; i++) { + dims[i] = rdims[i] / idims[i]; + } + auto result = tile(input.array(), dims); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(reduceAs(grad_output, inputs[0])); + }; + return Variable(result, {input}, grad_func); + } + + Variable reduceAs(const Variable &input, const Variable &reference) + { + dim4 idims = input.array().dims(); + dim4 rdims = reference.array().dims(); + auto result = input.array(); + for (int i = 0; i < 4; i++) { + if (idims[i] != rdims[i]) result = sum(result, i); + } + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + inputs[0].addGrad(expandAs(grad_output, inputs[0])); + }; + return Variable(result, {input}, grad_func); + } + + Variable matmul(const Variable &lhs, const Variable &rhs) + { + // lhs:Input[0] -- [M, N] + // rhs:Input[1] -- [N, K] + //matmul(lhs, rhs) + // -- matmul([M, N], [N, K]) -- [M, K] + // result:grad_output -- [M, K] + auto result = matmul(lhs.array(), rhs.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + // matmulNT(grad_output, inputs[1]) + // -- matmulNT([M, K], [N, K]) + // -- matmul([M, K], [K, N]) -- [M, K] + inputs[0].addGrad(matmulNT(grad_output, inputs[1])); + // matmulTN(inputs[0], grad_output) + // -- matmulTN([M, N], [M, K]) + // -- matmul([N, M], [M, K]) -- [N, K] + inputs[1].addGrad(matmulTN(inputs[0], grad_output)); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + Variable matmulTN(const Variable &lhs, const Variable &rhs) + { + // lhs:Input[0] -- [N, M] + // rhs:Input[1] -- [N, K] + // matmulTN(lhs, rhs) + // -- matmulTN([N, M], [N, K]) + // -- matmul([M, N], [N, K]) -- [M, K] + // result:grad_output -- [M, K] + auto result = matmulTN(lhs.array(), rhs.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + // matmulNT(inputs[1], grad_output) + // -- matmulNT([N, K], [M, K]) + // -- matmul([N, K], [K, M]) -- [N, M] + inputs[0].addGrad(matmulNT(inputs[1], grad_output)); + // matmul(inputs[0], grad_output) + // -- matmulNT([N, M], [M, K]) -- [N, K] + inputs[1].addGrad(matmul(inputs[0], grad_output)); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + + Variable matmulNT(const Variable &lhs, const Variable &rhs) + { + // lhs:Input[0] -- [M, N] + // rhs:Input[1] -- [K, N] + // matmulNT(lhs, rhs) + // -- matmulNT([M, N], [K, N]) + // -- matmul([M, N], [N, K]) -- [M, K] + // result:grad_output -- [M, K] + auto result = matmulNT(lhs.array(), rhs.array()); + auto grad_func = [](std::vector &inputs, const Variable &grad_output) { + // matmul(grad_output, inputs[1]) + // -- matmul([M, K], [K, N]) -- [M, N] + inputs[0].addGrad(matmul(grad_output, inputs[1])); + // matmulTN(grad_output, inputs[0]) + // -- matmulTN([M, K], [M, N]) + // -- matmul([K, M], [M, N]) -- [K, N] + inputs[1].addGrad(matmulTN(grad_output, inputs[0])); + }; + return Variable(result, {lhs, rhs}, grad_func); + } + } +} diff --git a/src/autograd/Variable.cpp b/src/autograd/Variable.cpp new file mode 100644 index 0000000..9ff55c5 --- /dev/null +++ b/src/autograd/Variable.cpp @@ -0,0 +1,191 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include + +namespace af { + namespace autograd { + + Variable::Shared::Shared() : + m_calc_grad(true), + m_data(), + m_inputs(), + m_grads(), + m_grad_func(nullptr) + {} + + Variable::Shared::Shared(const af::array &data, bool calc_grad) : + m_calc_grad(calc_grad), + m_data(data), + m_inputs(), + m_grads(), + m_grad_func(nullptr) + {} + + Variable::Shared::Shared(const af::array &data, + const std::vector &inputs, + GradFunc_t grad_func, + bool calc_grad) : + m_calc_grad(calc_grad), + m_data(data), + m_inputs(inputs.begin(), inputs.end()), + m_grads(), + m_grad_func(grad_func) + {} + + Variable::Variable() : + m_shared(new Shared()) + { + } + + Variable::Variable(const af::array &data, bool calc_grad) : + m_shared(new Shared(data, calc_grad)) + {} + + Variable::Variable(const af::array &data, + const std::vector &inputs, + GradFunc_t grad_func) : + m_shared(nullptr) + { + bool calc_grad = false; + for (const auto &input : inputs) { + calc_grad |= input.isCalcGrad(); + } + if (calc_grad) { + m_shared = std::shared_ptr(new Shared(data, inputs, grad_func, true)); + } else { + m_shared = std::shared_ptr(new Shared(data, false)); + } + } + + af::array& Variable::array() const + { + return m_shared->m_data; + } + + Variable& Variable::grad() const + { + if (!m_shared->m_calc_grad) { + throw af::exception("Gradient calclation disabled."); + } + if (m_shared->m_grads.size() == 0) { + throw af::exception("Gradient hasn't been calculated yet."); + } + return m_shared->m_grads[0]; + } + + std::ptrdiff_t Variable::id() const + { + return (std::ptrdiff_t)m_shared.get(); + } + + std::vector& Variable::getInputs() const + { + return m_shared->m_inputs; + } + + bool Variable::isCalcGrad() const + { + return m_shared->m_calc_grad; + } + + bool Variable::isGradAvailable() const + { + if (!m_shared->m_calc_grad) return false; + return m_shared->m_grads.size() >= 1; + } + + void Variable::zeroGrad() + { + m_shared->m_grads.clear(); + } + + void Variable::setCalcGrad(bool calc_grad) + { + m_shared->m_calc_grad = calc_grad; + if (!calc_grad) { + m_shared->m_grad_func = nullptr; + m_shared->m_inputs.clear(); + m_shared->m_grads.clear(); + } + } + + void Variable::addGrad(const Variable &child_grad) + { + if (m_shared->m_calc_grad) { + m_shared->m_grads.push_back(child_grad); + } + } + + void Variable::evalGrad(bool retain_grad_graph) + { + // Flag asking not to calculate gradients + if (!m_shared->m_calc_grad) return; + + // Best not to evaluate the JIT immediately if theres only a single gradient + Variable grad = m_shared->m_grads[0]; + if (m_shared->m_grads.size() > 1) { + for (unsigned i = 1; i < m_shared->m_grads.size(); i++) { + grad = grad + m_shared->m_grads[i]; + } + grad.array().eval(); + m_shared->m_grads.resize(1); + } + + // Remove the graph if not needed + if (!retain_grad_graph) { + // This can be done by extracting af::array and ignoring everything else + auto grad_data = grad.array(); + // Since there's no graph leading this, set calc_grad to false + grad = Variable(grad_data, false); + } + + m_shared->m_grads[0] = grad; + } + + void Variable::calcGradInputs(bool retain_grad_graph) + { + evalGrad(); + if (m_shared->m_grad_func) { + m_shared->m_grad_func(m_shared->m_inputs, m_shared->m_grads[0]); + } + } + + void Variable::backward(const Variable &grad, bool retain_grad_graph) + { + this->addGrad(grad); + Variable::DAG_t dag = Variable::build(*this); + for (auto iter = dag.rbegin(); iter != dag.rend(); iter++) { + iter->calcGradInputs(retain_grad_graph); + } + } + + Variable::DAG_t Variable::build(const Variable &var) + { + Cache_t cache; + Variable::DAG_t dag; + Variable::buildSubGraph(cache, dag, var); + return dag; + } + + void Variable::buildSubGraph(Cache_t &cache, Variable::DAG_t &dag, const Variable &var) + { + std::ptrdiff_t id = var.id(); + if (cache.find(id) != cache.end()) { + return; + } + for (const auto &input : var.getInputs()) { + Variable::buildSubGraph(cache, dag, input); + } + cache[id] = true; + dag.push_back(var); + } + } +} diff --git a/src/nn/Modules/Activations.cpp b/src/nn/Modules/Activations.cpp new file mode 100644 index 0000000..0d1ca6e --- /dev/null +++ b/src/nn/Modules/Activations.cpp @@ -0,0 +1,33 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include + +namespace af +{ + namespace nn + { + using namespace autograd; + + Sigmoid::Sigmoid() {} + + Variable Sigmoid::forward(const Variable &input) + { + return sigmoid(input); + } + + Tanh::Tanh() {} + + Variable Tanh::forward(const Variable &input) + { + return tanh(input); + } + } +} diff --git a/src/nn/Modules/Container.cpp b/src/nn/Modules/Container.cpp new file mode 100644 index 0000000..9078631 --- /dev/null +++ b/src/nn/Modules/Container.cpp @@ -0,0 +1,42 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include +#include + +namespace af +{ + namespace nn + { + using namespace autograd; + + Container::Container() {} + + ModulePtr Container::get(int id) + { + return m_modules[id]; + } + + std::vector Container::modules() + { + return m_modules; + } + + Sequential::Sequential() {} + + Variable Sequential::forward(const Variable &input) + { + Variable output = input; + for (auto &module : m_modules) { + output = module->forward(output); + } + return output; + } + } +} diff --git a/src/nn/Modules/Linear.cpp b/src/nn/Modules/Linear.cpp new file mode 100644 index 0000000..c289ea5 --- /dev/null +++ b/src/nn/Modules/Linear.cpp @@ -0,0 +1,59 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ +#include + +#include +#include + +namespace af +{ + namespace nn + { + using namespace autograd; + + Linear::Linear(int input_size, int output_size, bool bias, float spread) : + m_bias(bias) + { + auto w = nn::weight(input_size, output_size, spread); + if (bias) { + auto b = nn::weight(1, output_size, spread); + setParams({w, b}); + } else { + setParams({w}); + } + } + + Linear::Linear(const Variable &w) : + m_bias(false), + Module({w}) + { + } + + Linear::Linear(const Variable &w, const Variable &b) : + m_bias(true), + Module({w, b}) + { + if (b.array().dims(0) != w.array().dims(0)) { + throw af::exception("nn:Linear: Dimension mismatch between weight and bias."); + } + if (b.array().dims(1) != 1) { + throw af::exception("nn::Linear: Bias must be a vector."); + } + } + + Variable Linear::forward(const Variable &input) + { + auto res = matmul(m_parameters[0], input); + if (m_bias) { + res = res + expandAs(m_parameters[1], res); + } + return res; + } + } +} diff --git a/src/nn/Modules/Module.cpp b/src/nn/Modules/Module.cpp new file mode 100644 index 0000000..5126218 --- /dev/null +++ b/src/nn/Modules/Module.cpp @@ -0,0 +1,61 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include + +namespace af +{ + namespace nn + { + using autograd::Variable; + Module::Module() : + m_parameters() + { + } + + Module::Module(const std::vector ¶meters) : + m_parameters(parameters.begin(), parameters.end()) + { + } + + void Module::setParams(const std::vector ¶meters) + { + m_parameters.clear(); + for(auto parameter : parameters) { + m_parameters.push_back(parameter); + } + } + + void Module::train() + { + for (auto ¶meter : m_parameters) { + parameter.setCalcGrad(true); + } + } + + void Module::eval() + { + for (auto ¶meter : m_parameters) { + parameter.setCalcGrad(false); + } + } + + std::vector Module::parameters() + { + return m_parameters; + } + + void Module::zeroGrad() + { + for (auto ¶meter : m_parameters) { + parameter.zeroGrad(); + } + } + } +} diff --git a/src/nn/Types.cpp b/src/nn/Types.cpp new file mode 100644 index 0000000..698b497 --- /dev/null +++ b/src/nn/Types.cpp @@ -0,0 +1,36 @@ +/******************************************************* + * Copyright (c) 2017, ArrayFire + * All rights reserved. + * + * This file is distributed under 3-clause BSD license. + * The complete license agreement can be obtained at: + * http://arrayfire.com/licenses/BSD-3-Clause + ********************************************************/ + +#include + +#include + +namespace af { + namespace nn { + + using autograd::Variable; + + Variable input(const af::array &arr) + { + return Variable(arr, false); + } + + Variable parameter(const af::array &arr) + { + return Variable(arr, true); + } + + Variable weight(int input_size, int output_size, float spread) + { + auto w = af::randu(output_size, input_size) * spread - spread / 2; + w.eval(); + return parameter(w); + } + } +} diff --git a/test/Activations.cpp b/test/Activations.cpp deleted file mode 100644 index 098c864..0000000 --- a/test/Activations.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -using namespace afml::nn; - -int main() -{ - const int num = 5; - - afml::ArrayVector in = {100 * af::randu(num, 1) - 50}; - afml::ArrayVector grad = {100 * af::randu(num, 1)}; - - ReLU r = ReLU(num, 0); - Sigmoid s = Sigmoid(num); - Tanh t = Tanh(num); - - af_print(in[0]); - af_print(r.forward(in)[0]); - af_print(s.forward(in)[0]); - af_print(t.forward(in)[0]); - - af_print(r.backward(in, grad)[0]); - af_print(s.backward(in, grad)[0]); - af_print(t.backward(in, grad)[0]); -} diff --git a/test/FFNet.cpp b/test/FFNet.cpp deleted file mode 100644 index b9c9b62..0000000 --- a/test/FFNet.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -using namespace af; -using namespace afml; -using namespace afml::nn; - -int main() -{ - const int inputSize = 2; - const int hiddenSize = 3; - const int outputSize = 1; - const int numSamples = 4; - const double lr = 0.8; - - float hInput[] = {1, 1, - 0, 0, - 0, 1, - 1, 0}; - - float hOutput[] = {0, - 0, - 1, - 1}; - - af::array in(inputSize, numSamples, hInput); - af::array out(outputSize, numSamples, hOutput); - - - FFNet network(inputSize); - network.addLinearNode(hiddenSize, 5).addActivationNode(); - network.addLinearNode(outputSize, 5).addActivationNode(); - - for (int i = 0; i < 1000; i++) { - - ArrayVector data = network.forward({in}); - double err = af::norm(data[0] - out); - - data[0] = out - data[0]; - - if ((i + 1) % 100 == 0) { - printf("Error at iteration(%d) : %2.10lf\n", i + 1, err); - } - network.backward({in}, data); - network.update(lr); - } - - af_print(af::round(network.forward({in})[0])); -} diff --git a/test/Node.cpp b/test/Node.cpp deleted file mode 100644 index 33a0a90..0000000 --- a/test/Node.cpp +++ /dev/null @@ -1,21 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -using namespace afml::nn; - -int main() -{ - int inSize = 10; - int outSize = 2; - - Node n(1, &inSize, 1, &outSize, "test"); - n.info(); -} diff --git a/test/Weights.cpp b/test/Weights.cpp deleted file mode 100644 index 8f65895..0000000 --- a/test/Weights.cpp +++ /dev/null @@ -1,20 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -using namespace afml::nn; - -int main() -{ - Weights w(10, 1, 0.05); - af_print(w); - - return 0; -} diff --git a/test/perceptron.cpp b/test/perceptron.cpp deleted file mode 100644 index 213378c..0000000 --- a/test/perceptron.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/******************************************************* - * Copyright (c) 2015, ArrayFire - * All rights reserved. - * - * This file is distributed under 3-clause BSD license. - * The complete license agreement can be obtained at: - * http://arrayfire.com/licenses/BSD-3-Clause - ********************************************************/ - -#include - -using namespace afml; -using namespace afml::nn; - -int main() -{ - const int inputSize = 2; - const int outputSize = 1; - const int numSamples = 4; - const double lr = 10; - - float hInput[] = {1, 1, - 0, 0, - 1, 0, - 0, 1}; - - float hOutput[] = {1, - 0, - 1, - 1}; - - af::array in(inputSize, numSamples, hInput); - af::array out(outputSize, numSamples, hOutput); - - std::vector perceptron; - perceptron.emplace_back(new LinearNode(inputSize, outputSize, 10)); - perceptron.emplace_back(new Sigmoid(inputSize)); - - for (int i = 0; i < 10; i++) { - ArrayVector data = {in}; - - std::vector inputs(2); - for (int n = 0; n < 2; n++) { - inputs[n] = data; - data = perceptron[n]->forward(data); - } - - data[0] = out - data[0]; - - printf("Error at iteration(%d) : %lf\n", i + 1, af::sum(af::abs(data[0])) / numSamples); - - for (int n = 1; n >= 0; n--) { - data = perceptron[n]->backward(inputs[n], data); - perceptron[n]->update(lr); - } - } -}