-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlinear.cpp
More file actions
90 lines (70 loc) · 2.04 KB
/
linear.cpp
File metadata and controls
90 lines (70 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include "linear.h"
#include <ctime>
#include <iostream>
#include <tuple>
Linear::Linear(size_t input_size, size_t units){
Linear::input_size = input_size;
output_size = units;
std::vector<size_t> weightShape{input_size, units};
std::vector<size_t> biasShape{1, units};
weights = new Tensor(weightShape);
biases = new Tensor(biasShape);
}
void Linear::initRange(){
weights->initRange();
biases->initRange();
}
void Linear::initNormal(){
const auto seed = static_cast<int>(time(NULL)); // Seed with current time
weights->initNormal(seed);
biases->initNormal(seed + 1);
}
void Linear::print(){
std::cout << "Weights:\n";
weights->print();
std::cout << "Biases:\n";
biases->print();
}
Tensor Linear::forward(Tensor input){
// Y = XW + B
// input: array of size (height, width)
return input.matmul(*weights).add(*biases);
}
std::tuple<Tensor, Tensor> Linear::calculateGradient(Tensor input, Tensor nextGrads){
// Computes gradient
// Bias gradient
// Gb = D
Tensor gradBias = Tensor(biases->shape);
for (size_t i = 0; i < biases->shape[0]; ++i) {
// Over batch
for (size_t j = 0; j < biases->shape[1]; ++j) {
// Over nodes
gradBias.data[j] += nextGrads.data[i * nextGrads.shape[1] + j];
}
}
// Weight gradient
// Gw = DX
Tensor gradWeights = Tensor(weights->shape);
for (size_t k = 0; k < input.shape[0]; ++k) {
// Over batch
for (size_t i = 0; i < weights->shape[1]; ++i) {
// Over nodes
for (size_t j = 0; j < weights->shape[0]; ++j) {
// Over params
gradWeights.data[j * weights->shape[1] + i] += nextGrads.data[k * nextGrads.shape[1] + i] *
input.data[k * input.shape[1] + j];
}
}
}
return std::make_tuple(gradWeights, gradBias);
}
void Linear::updateWeights(Tensor gradWeights, Tensor gradBiases, float learningRate){
// Update biases
for (size_t i = 0; i < biases->shape[1]; ++i) {
biases->data[i] += gradBiases.data[i] * learningRate;
}
// Update weights
for (size_t i = 0; i < weights->shape[0] * weights->shape[1]; ++i) {
weights->data[i] += gradWeights.data[i] * learningRate;
}
}