-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbase.cc
More file actions
110 lines (97 loc) · 3.22 KB
/
base.cc
File metadata and controls
110 lines (97 loc) · 3.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include <cassert>
#include <iostream>
#include <fstream>
#include "CLDeviceManager.h"
using namespace std;
#if 1
#define P_CPU CL_DEVICE_TYPE_CPU
#define P_GPU CL_DEVICE_TYPE_GPU
int main()
{
CLDeviceManager dm;
auto& d = dm.GetDevices();
cl_platform_id pid;
auto p = P_CPU;
for (auto& i: d) {
for (auto &j: i.second) {
if (dm.IsGPU(j)) {
cout << "GPU found" << endl;
if (p == P_GPU)
pid = i.first;
}
else if (dm.IsCPU(j)) {
cout << "CPU found" << endl;
if (p == P_CPU)
pid = i.first;
}
}
}
//auto c = dm.CreateContext(pid);
auto c1 = dm.CreateSpecificContext(p);
assert(c1.context);
CLProgram pro(c1, "test.c", true);
CLKernel kernel(pro, "x2");
const int M = 1024*8;
const int N = M*M;
unsigned int *data = new unsigned int[N];
unsigned int *odata = new unsigned int[N];
for (auto i = 0; i < N; i++)
data[i] = i;
size_t sz = sizeof(unsigned int) * N;
CLBuffer mem1(c1, sz, data, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
CLBuffer mem2(c1, sz, NULL, CL_MEM_WRITE_ONLY);
kernel.SetArg(0, sizeof(mem1.buff), &mem1.buff);
kernel.SetArg(1, sizeof(mem2.buff), &mem2.buff);
unsigned int width = M;
kernel.SetArg(2, sizeof(uint), &width);
cl_device_id dev;
clGetDeviceIDs(pid, p, 1, &dev, NULL);
auto q = CLQueue(c1, dev, CL_QUEUE_PROFILING_ENABLE);
//auto q = CLQueue(c1, dev);
const size_t gsz[] = {M, M};
const size_t lsz[] = {16, 16};
for (auto i = 0; i < N; i++)
odata[i] = 0;
CLEvent ev;
kernel.Run(q, 2, gsz, lsz, &ev);
//kernel.Run(q, 2, gsz, lsz);
///*
ev.Wait();
cl_uint start = ev.GetTime(CL_PROFILING_COMMAND_QUEUED);
cl_uint end = ev.GetTime(CL_PROFILING_COMMAND_END);
cout << "exec time: " << end - start << endl;
//*/
mem2.Read(q, true, 0, sz, odata);
for (auto i = 0; i < 32; i++) {
cout << odata[i] << " ";
}
cout << endl;
c1.Release();
return 0;
}
#else
int main()
{
CLDeviceManager dm;
auto& d = dm.GetDevices();
cl_platform_id pid;
for (auto& i: d) {
for (auto &j: i.second) {
if (dm.IsGPU(j)) {
cout << "GPU found" << endl;
pid = i.first;
}
else if (dm.IsCPU(j))
cout << "CPU found" << endl;
}
}
//auto c = dm.CreateContext(pid);
auto c1 = dm.CreateSpecificContext(CL_DEVICE_TYPE_GPU);
assert(c1.context);
auto c2 = dm.CreateSpecificContext(CL_DEVICE_TYPE_CPU);
assert(c2.context);
//c1.Release();
//c2.Release();
return 0;
}
#endif