1
1
package cu
2
2
3
3
import (
4
+ "runtime"
4
5
"testing"
5
6
"unsafe"
6
7
)
7
8
8
9
func TestAttributes (t * testing.T ) {
9
- devices , _ := NumDevices ()
10
+ var dev Device
11
+ var ctx Context
12
+ var err error
10
13
11
- if devices == 0 {
12
- return
14
+ if dev , ctx , err = testSetup (); err != nil {
15
+ if err .Error () == "NoDevice" {
16
+ return
17
+ }
18
+ t .Fatal (err )
13
19
}
14
20
15
- d := Device (0 )
16
- mtpb , err := d .Attribute (MaxThreadsPerBlock )
17
- if err != nil {
21
+ var mtpb , maj , min int
22
+ if mtpb , err = dev .Attribute (MaxThreadsPerBlock ); err != nil {
18
23
t .Fatalf ("Failed while getting MaxThreadsPerBlock: %v" , err )
19
24
}
20
25
21
- maj , err := d .Attribute (ComputeCapabilityMajor )
22
- if err != nil {
26
+ if maj , err = dev .Attribute (ComputeCapabilityMajor ); err != nil {
23
27
t .Fatalf ("Failed while getting Compute Capability Major: %v" , err )
24
28
}
25
29
26
- min , err := d .Attribute (ComputeCapabilityMinor )
27
- if err != nil {
30
+ if min , err = dev .Attribute (ComputeCapabilityMinor ); err != nil {
28
31
t .Fatalf ("Failed while getting Compute Capability Minor: %v" , err )
29
32
}
30
33
31
- attrs , err := d . Attributes ( MaxThreadsPerBlock , ComputeCapabilityMajor , ComputeCapabilityMinor )
32
- if err != nil {
34
+ var attrs [] int
35
+ if attrs , err = dev . Attributes ( MaxThreadsPerBlock , ComputeCapabilityMajor , ComputeCapabilityMinor ); err != nil {
33
36
t .Error (err )
34
37
}
35
38
@@ -42,25 +45,31 @@ func TestAttributes(t *testing.T) {
42
45
if attrs [2 ] != min {
43
46
t .Errorf ("Expected ComputeCapabilityMinor to be %v. Got %v instead" , min , attrs [2 ])
44
47
}
48
+
49
+ DestroyContext (& ctx )
45
50
}
46
51
47
52
func TestLaunchAndSync (t * testing.T ) {
48
- devices , _ := NumDevices ()
49
-
50
- if devices == 0 {
51
- return
52
- }
53
-
54
53
var err error
55
54
var ctx Context
56
55
var mod Module
57
56
var fn Function
58
57
59
- d := Device (0 )
60
- if ctx , err = d .MakeContext (SchedAuto ); err != nil {
58
+ if _ , ctx , err = testSetup (); err != nil {
59
+ if err .Error () == "NoDevice" {
60
+ return
61
+ }
61
62
t .Fatal (err )
62
63
}
63
64
65
+ if mod , err = LoadData (add32PTX ); err != nil {
66
+ t .Fatalf ("Cannot load add32: %v" , err )
67
+ }
68
+
69
+ if fn , err = mod .Function ("add32" ); err != nil {
70
+ t .Fatalf ("Cannot get add32(): %v" , err )
71
+ }
72
+
64
73
a := make ([]float32 , 1000 )
65
74
b := make ([]float32 , 1000 )
66
75
for i := range b {
@@ -86,14 +95,6 @@ func TestLaunchAndSync(t *testing.T) {
86
95
t .Fatalf ("Failed to copy memory from b: %v" , err )
87
96
}
88
97
89
- if mod , err = LoadData (add32PTX ); err != nil {
90
- t .Fatalf ("Cannot load add32: %v" , err )
91
- }
92
-
93
- if fn , err = mod .Function ("add32" ); err != nil {
94
- t .Fatalf ("Cannot get add32(): %v" , err )
95
- }
96
-
97
98
args := []unsafe.Pointer {
98
99
unsafe .Pointer (& memA ),
99
100
unsafe .Pointer (& memB ),
@@ -119,24 +120,23 @@ func TestLaunchAndSync(t *testing.T) {
119
120
}
120
121
}
121
122
123
+ MemFree (memA )
124
+ MemFree (memB )
122
125
Unload (mod )
123
126
DestroyContext (& ctx )
124
127
}
125
128
126
129
func TestBatchContext (t * testing.T ) {
127
- devices , _ := NumDevices ()
128
-
129
- if devices == 0 {
130
- return
131
- }
132
-
133
130
var err error
131
+ var dev Device
134
132
var ctx Context
135
133
var mod Module
136
134
var fn Function
137
135
138
- d := Device (0 )
139
- if ctx , err = d .MakeContext (SchedAuto ); err != nil {
136
+ if dev , ctx , err = testSetup (); err != nil {
137
+ if err .Error () == "NoDevice" {
138
+ return
139
+ }
140
140
t .Fatal (err )
141
141
}
142
142
@@ -148,7 +148,7 @@ func TestBatchContext(t *testing.T) {
148
148
t .Fatalf ("Cannot get add32(): %v" , err )
149
149
}
150
150
151
- bctx := NewBatchedContext (ctx , d )
151
+ bctx := NewBatchedContext (ctx , dev )
152
152
153
153
a := make ([]float32 , 1000 )
154
154
b := make ([]float32 , 1000 )
@@ -188,6 +188,156 @@ func TestBatchContext(t *testing.T) {
188
188
}
189
189
}
190
190
191
+ MemFree (memA )
192
+ MemFree (memB )
193
+ Unload (mod )
194
+ DestroyContext (& ctx )
195
+ }
196
+
197
+ func BenchmarkNoBatching (bench * testing.B ) {
198
+ runtime .LockOSThread ()
199
+ defer runtime .UnlockOSThread ()
200
+
201
+ var err error
202
+ var ctx Context
203
+ var mod Module
204
+ var fn Function
205
+
206
+ if _ , ctx , err = testSetup (); err != nil {
207
+ if err .Error () == "NoDevice" {
208
+ return
209
+ }
210
+ bench .Fatal (err )
211
+ }
212
+
213
+ if mod , err = LoadData (add32PTX ); err != nil {
214
+ bench .Fatalf ("Cannot load add32: %v" , err )
215
+ }
216
+
217
+ if fn , err = mod .Function ("add32" ); err != nil {
218
+ bench .Fatalf ("Cannot get add32(): %v" , err )
219
+ }
220
+
221
+ a := make ([]float32 , 1000000 )
222
+ b := make ([]float32 , 1000000 )
223
+ for i := range b {
224
+ a [i ] = 1
225
+ b [i ] = 1
226
+ }
227
+
228
+ size := int64 (len (a ) * 4 )
229
+
230
+ var memA , memB DevicePtr
231
+ if memA , err = MemAlloc (size ); err != nil {
232
+ bench .Fatalf ("Failed to allocate for a: %v" , err )
233
+ }
234
+ if memB , err = MemAlloc (size ); err != nil {
235
+ bench .Fatalf ("Failed to allocate for b: %v" , err )
236
+ }
237
+
238
+ args := []unsafe.Pointer {
239
+ unsafe .Pointer (& memA ),
240
+ unsafe .Pointer (& memB ),
241
+ unsafe .Pointer (& size ),
242
+ }
243
+
244
+ // ACTUAL BENCHMARK STARTS HERE
245
+ for i := 0 ; i < bench .N ; i ++ {
246
+ for j := 0 ; j < 1000 ; j ++ {
247
+ if err = MemcpyHtoD (memA , unsafe .Pointer (& a [0 ]), size ); err != nil {
248
+ bench .Fatalf ("Failed to copy memory from a: %v" , err )
249
+ }
250
+
251
+ if err = MemcpyHtoD (memB , unsafe .Pointer (& b [0 ]), size ); err != nil {
252
+ bench .Fatalf ("Failed to copy memory from b: %v" , err )
253
+ }
254
+
255
+ if err = fn .LaunchAndSync (100 , 10 , 1 , 1000 , 1 , 1 , 1 , Stream (0 ), args ); err != nil {
256
+ bench .Error ("Launch and Sync Failed: %v" , err )
257
+ }
258
+
259
+ if err = MemcpyDtoH (unsafe .Pointer (& a [0 ]), memA , size ); err != nil {
260
+ bench .Fatalf ("Failed to copy memory to a: %v" , err )
261
+ }
262
+
263
+ if err = MemcpyDtoH (unsafe .Pointer (& b [0 ]), memB , size ); err != nil {
264
+ bench .Fatalf ("Failed to copy memory to b: %v" , err )
265
+ }
266
+ }
267
+ }
268
+ MemFree (memA )
269
+ MemFree (memB )
270
+ Unload (mod )
271
+ DestroyContext (& ctx )
272
+
273
+ }
274
+
275
+ func BenchmarkBatching (bench * testing.B ) {
276
+ runtime .LockOSThread ()
277
+ defer runtime .UnlockOSThread ()
278
+
279
+ var err error
280
+ var dev Device
281
+ var ctx Context
282
+ var mod Module
283
+ var fn Function
284
+
285
+ if dev , ctx , err = testSetup (); err != nil {
286
+ if err .Error () == "NoDevice" {
287
+ return
288
+ }
289
+ bench .Fatal (err )
290
+ }
291
+
292
+ if mod , err = LoadData (add32PTX ); err != nil {
293
+ bench .Fatalf ("Cannot load add32: %v" , err )
294
+ }
295
+
296
+ if fn , err = mod .Function ("add32" ); err != nil {
297
+ bench .Fatalf ("Cannot get add32(): %v" , err )
298
+ }
299
+
300
+ a := make ([]float32 , 1000000 )
301
+ b := make ([]float32 , 1000000 )
302
+ for i := range b {
303
+ a [i ] = 1
304
+ b [i ] = 1
305
+ }
306
+
307
+ size := int64 (len (a ) * 4 )
308
+
309
+ var memA , memB DevicePtr
310
+ if memA , err = MemAlloc (size ); err != nil {
311
+ bench .Fatalf ("Failed to allocate for a: %v" , err )
312
+ }
313
+ if memB , err = MemAlloc (size ); err != nil {
314
+ bench .Fatalf ("Failed to allocate for b: %v" , err )
315
+ }
316
+
317
+ bctx := NewBatchedContext (ctx , dev )
318
+
319
+ args := []unsafe.Pointer {
320
+ unsafe .Pointer (& memA ),
321
+ unsafe .Pointer (& memB ),
322
+ unsafe .Pointer (& size ),
323
+ }
324
+
325
+ // ACTUAL BENCHMARK STARTS HERE
326
+ for i := 0 ; i < bench .N ; i ++ {
327
+ for j := 0 ; j < 1000 ; j ++ {
328
+ bctx .MemcpyHtoD (memA , unsafe .Pointer (& a [0 ]), size )
329
+ bctx .MemcpyHtoD (memB , unsafe .Pointer (& b [0 ]), size )
330
+ bctx .LaunchKernel (fn , 100 , 10 , 1 , 1000 , 1 , 1 , 0 , Stream (0 ), args )
331
+ bctx .Synchronize ()
332
+ bctx .MemcpyDtoH (unsafe .Pointer (& a [0 ]), memA , size )
333
+ bctx .MemcpyDtoH (unsafe .Pointer (& b [0 ]), memB , size )
334
+ }
335
+ bctx .DoWork ()
336
+ }
337
+
338
+ MemFree (memA )
339
+ MemFree (memB )
191
340
Unload (mod )
192
341
DestroyContext (& ctx )
342
+
193
343
}
0 commit comments