-
Notifications
You must be signed in to change notification settings - Fork 3
/
kernel.cl
64 lines (57 loc) · 1.58 KB
/
kernel.cl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#define KERNEL 5
#define NUM 256
#define INIMROW 228
#define IMROW 224
#define OUTIMROW 112
#define weight(i,j,p,q) weight[(i)*NUM*KERNEL*KERNEL + (j)*KERNEL*KERNEL + (p)*KERNEL + (q)]
#define Cin(j,h,w) Cin[(j)*INIMROW*INIMROW+(h)*INIMROW+(w)]
#define Cout(i,h,w) Cout[(i)*OUTIMROW*OUTIMROW+(h)*OUTIMROW+(w)]
#define max(a,b) (a>b)?(a):(b)
__kernel __attribute__ ((reqd_work_group_size(1, 1, 1)))
void cnn_kernel(
__global float* Cin,
__global float* weight,
__global float* bias,
__global float* Cout
){
static float C[NUM][IMROW][IMROW];
for(int i = 0; i < NUM; i++) {
for(int h = 0; h < IMROW; h++) {
for(int w = 0; w < IMROW; w++)
C[i][h][w] = bias[i];
}
}
// Convolution
for(int i = 0; i < NUM; i++) {
for(int j = 0; j < NUM; j++) {
for(int h = 0; h < IMROW; h++) {
for(int w = 0; w < IMROW; w++) {
for(int p = 0; p < KERNEL; p++) {
for(int q = 0; q < KERNEL; q++)
C[i][h][w] += weight(i,j,p,q) * Cin(j,h + p,w + q);
}
}
}
}
}
// ReLU
for (int i = 0; i < NUM; i++) {
for (int h = 0; h < IMROW; h++) {
for (int w = 0; w < IMROW; w++) {
C[i][h][w] = max(0, C[i][h][w]);
}
}
}
// Max pooling
for (int i = 0; i < NUM; i++) {
for (int h = 0; h < OUTIMROW; h++) {
for (int w = 0; w < OUTIMROW; w++) {
float local_max = C[i][2 * h][2 * w];
local_max = max(local_max, C[i][2 * h + 1][2 * w]);
local_max = max(local_max, C[i][2 * h + 1][2 * w + 1]);
local_max = max(local_max, C[i][2 * h][2 * w + 1]);
Cout(i,h,w) = local_max;
}
}
}
}