-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathex_blob.cpp
More file actions
100 lines (83 loc) · 2.69 KB
/
ex_blob.cpp
File metadata and controls
100 lines (83 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#include "caffe/caffe.hpp"
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
using namespace caffe;
using namespace std;
using namespace cv;
typedef double Dtype;
clock_t tStart, tEnd;
#define COMPTIME(X) \
cout << "CompTime of "<< (X) <<": " << (double)(tEnd-tStart)/CLOCKS_PER_SEC<<endl;
int main(int argc, char** argv) {
//// Initialization
Blob<Dtype>* const blob = new Blob<Dtype>(20, 30, 40, 50);
if(blob){
cout<<"Size of blob:";
cout<<" N="<<blob->num();
cout<<" K="<<blob->channels();
cout<<" H="<<blob->height();
cout<<" W="<<blob->width();
cout<<" C="<<blob->count();
cout<<endl;
}
// reshaping the size of blob
blob->Reshape(50, 40, 30, 20);
if(blob){
cout<<"Size of reshaped blob:";
cout<<" N="<<blob->num();
cout<<" K="<<blob->channels();
cout<<" H="<<blob->height();
cout<<" W="<<blob->width();
cout<<" C="<<blob->count();
cout<<endl;
}
// Random sampling from uniform distribution
FillerParameter filler_param;
filler_param.set_min(-3);
filler_param.set_max(3);
UniformFiller<Dtype> filler(filler_param);
filler.Fill(blob);
//// sum of squares
// access data on the host
Dtype expected_sumsq = 0;
const Dtype* data = blob->cpu_data();
for (int i = 0; i < blob->count(); ++i) {
expected_sumsq += data[i] * data[i];
}
cout<<endl;
cout<<"expected sumsq of blob: "<<expected_sumsq<<endl;
tStart = clock();
cout<<"sumsq of blob on cpu: "<<blob->sumsq_data()<<endl;
tEnd = clock();
COMPTIME("sumsq of blob on cpu");
// Do an access on the current device,
// so that the sumsq computation is done on that device.
tStart = clock();
blob->gpu_data(); // memcopy host to device (to_gpu() in syncedmem.cpp)
tEnd = clock();
COMPTIME("cpu->gpu time");
tStart = clock();
cout<<"sumsq of blob on gpu: "<<blob->sumsq_data()<<endl;
tEnd = clock();
COMPTIME("sumsq on gpu time");
//// Test of syncmem
cout<<endl;
tStart = clock();
blob->gpu_data(); // no data copy since both have up-to-date contents.
tEnd = clock();
COMPTIME("cpu->gpu time");
// gpu data manipulation
const Dtype kDataScaleFactor = 2;
blob->scale_data(kDataScaleFactor); // change data on gpu
tStart = clock();
blob->cpu_data(); // memcopy device to host (to_cpu() in syncedmem.cpp)
tEnd = clock();
COMPTIME("gpu->cpu time");
tStart = clock();
cout<<"sumsq of blob on gpu: "<<blob->sumsq_data()<<endl; // this is done on gpu
tEnd = clock();
COMPTIME("sumsq on gpu time");
delete blob;
return 0;
}