Skip to content

Commit 08a857d

Browse files
committed
Added some benchmarks for batch vs nobatch
1 parent 7bc2796 commit 08a857d

File tree

2 files changed

+191
-41
lines changed

2 files changed

+191
-41
lines changed

batch.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ CUresult processFn(fnargs_t* args){
5353
break;
5454
case fn_memcpyHtoD:
5555
ret = cuMemcpyHtoD(args->devPtr0, (void*)(args->ptr0), args->size);
56-
fprintf(stderr, "ret memcpyHtoD %d args->ptr0 %d\n", ret, args->ptr0);
56+
// fprintf(stderr, "ret memcpyHtoD %d args->ptr0 %d\n", ret, args->ptr0);
5757
break;
5858
case fn_memcpyDtoH:
5959
ret = cuMemcpyDtoH((void*)(args->ptr0), args->devPtr0, args->size);
@@ -87,12 +87,12 @@ CUresult processFn(fnargs_t* args){
8787
}
8888

8989
void process(fnargs_t* args, CUresult* retVal, int count){
90-
fprintf(stderr,"Processing: %d functions \n", count);
90+
// fprintf(stderr,"Processing: %d functions \n", count);
9191
for (int i = 0; i < count; ++i) {
92-
fprintf(stderr, "Processing function %d\n", i);
92+
// fprintf(stderr, "Processing function %d\n", i);
9393
CUresult ret;
9494
ret = processFn(&args[i]);
95-
fprintf(stderr, "ret %d\n",ret);
95+
// fprintf(stderr, "ret %d\n",ret);
9696

9797
retVal[i] = ret;
9898
}

batch_test.go

+187-37
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,38 @@
11
package cu
22

33
import (
4+
"runtime"
45
"testing"
56
"unsafe"
67
)
78

89
func TestAttributes(t *testing.T) {
9-
devices, _ := NumDevices()
10+
var dev Device
11+
var ctx Context
12+
var err error
1013

11-
if devices == 0 {
12-
return
14+
if dev, ctx, err = testSetup(); err != nil {
15+
if err.Error() == "NoDevice" {
16+
return
17+
}
18+
t.Fatal(err)
1319
}
1420

15-
d := Device(0)
16-
mtpb, err := d.Attribute(MaxThreadsPerBlock)
17-
if err != nil {
21+
var mtpb, maj, min int
22+
if mtpb, err = dev.Attribute(MaxThreadsPerBlock); err != nil {
1823
t.Fatalf("Failed while getting MaxThreadsPerBlock: %v", err)
1924
}
2025

21-
maj, err := d.Attribute(ComputeCapabilityMajor)
22-
if err != nil {
26+
if maj, err = dev.Attribute(ComputeCapabilityMajor); err != nil {
2327
t.Fatalf("Failed while getting Compute Capability Major: %v", err)
2428
}
2529

26-
min, err := d.Attribute(ComputeCapabilityMinor)
27-
if err != nil {
30+
if min, err = dev.Attribute(ComputeCapabilityMinor); err != nil {
2831
t.Fatalf("Failed while getting Compute Capability Minor: %v", err)
2932
}
3033

31-
attrs, err := d.Attributes(MaxThreadsPerBlock, ComputeCapabilityMajor, ComputeCapabilityMinor)
32-
if err != nil {
34+
var attrs []int
35+
if attrs, err = dev.Attributes(MaxThreadsPerBlock, ComputeCapabilityMajor, ComputeCapabilityMinor); err != nil {
3336
t.Error(err)
3437
}
3538

@@ -42,25 +45,31 @@ func TestAttributes(t *testing.T) {
4245
if attrs[2] != min {
4346
t.Errorf("Expected ComputeCapabilityMinor to be %v. Got %v instead", min, attrs[2])
4447
}
48+
49+
DestroyContext(&ctx)
4550
}
4651

4752
func TestLaunchAndSync(t *testing.T) {
48-
devices, _ := NumDevices()
49-
50-
if devices == 0 {
51-
return
52-
}
53-
5453
var err error
5554
var ctx Context
5655
var mod Module
5756
var fn Function
5857

59-
d := Device(0)
60-
if ctx, err = d.MakeContext(SchedAuto); err != nil {
58+
if _, ctx, err = testSetup(); err != nil {
59+
if err.Error() == "NoDevice" {
60+
return
61+
}
6162
t.Fatal(err)
6263
}
6364

65+
if mod, err = LoadData(add32PTX); err != nil {
66+
t.Fatalf("Cannot load add32: %v", err)
67+
}
68+
69+
if fn, err = mod.Function("add32"); err != nil {
70+
t.Fatalf("Cannot get add32(): %v", err)
71+
}
72+
6473
a := make([]float32, 1000)
6574
b := make([]float32, 1000)
6675
for i := range b {
@@ -86,14 +95,6 @@ func TestLaunchAndSync(t *testing.T) {
8695
t.Fatalf("Failed to copy memory from b: %v", err)
8796
}
8897

89-
if mod, err = LoadData(add32PTX); err != nil {
90-
t.Fatalf("Cannot load add32: %v", err)
91-
}
92-
93-
if fn, err = mod.Function("add32"); err != nil {
94-
t.Fatalf("Cannot get add32(): %v", err)
95-
}
96-
9798
args := []unsafe.Pointer{
9899
unsafe.Pointer(&memA),
99100
unsafe.Pointer(&memB),
@@ -119,24 +120,23 @@ func TestLaunchAndSync(t *testing.T) {
119120
}
120121
}
121122

123+
MemFree(memA)
124+
MemFree(memB)
122125
Unload(mod)
123126
DestroyContext(&ctx)
124127
}
125128

126129
func TestBatchContext(t *testing.T) {
127-
devices, _ := NumDevices()
128-
129-
if devices == 0 {
130-
return
131-
}
132-
133130
var err error
131+
var dev Device
134132
var ctx Context
135133
var mod Module
136134
var fn Function
137135

138-
d := Device(0)
139-
if ctx, err = d.MakeContext(SchedAuto); err != nil {
136+
if dev, ctx, err = testSetup(); err != nil {
137+
if err.Error() == "NoDevice" {
138+
return
139+
}
140140
t.Fatal(err)
141141
}
142142

@@ -148,7 +148,7 @@ func TestBatchContext(t *testing.T) {
148148
t.Fatalf("Cannot get add32(): %v", err)
149149
}
150150

151-
bctx := NewBatchedContext(ctx, d)
151+
bctx := NewBatchedContext(ctx, dev)
152152

153153
a := make([]float32, 1000)
154154
b := make([]float32, 1000)
@@ -188,6 +188,156 @@ func TestBatchContext(t *testing.T) {
188188
}
189189
}
190190

191+
MemFree(memA)
192+
MemFree(memB)
193+
Unload(mod)
194+
DestroyContext(&ctx)
195+
}
196+
197+
func BenchmarkNoBatching(bench *testing.B) {
198+
runtime.LockOSThread()
199+
defer runtime.UnlockOSThread()
200+
201+
var err error
202+
var ctx Context
203+
var mod Module
204+
var fn Function
205+
206+
if _, ctx, err = testSetup(); err != nil {
207+
if err.Error() == "NoDevice" {
208+
return
209+
}
210+
bench.Fatal(err)
211+
}
212+
213+
if mod, err = LoadData(add32PTX); err != nil {
214+
bench.Fatalf("Cannot load add32: %v", err)
215+
}
216+
217+
if fn, err = mod.Function("add32"); err != nil {
218+
bench.Fatalf("Cannot get add32(): %v", err)
219+
}
220+
221+
a := make([]float32, 1000000)
222+
b := make([]float32, 1000000)
223+
for i := range b {
224+
a[i] = 1
225+
b[i] = 1
226+
}
227+
228+
size := int64(len(a) * 4)
229+
230+
var memA, memB DevicePtr
231+
if memA, err = MemAlloc(size); err != nil {
232+
bench.Fatalf("Failed to allocate for a: %v", err)
233+
}
234+
if memB, err = MemAlloc(size); err != nil {
235+
bench.Fatalf("Failed to allocate for b: %v", err)
236+
}
237+
238+
args := []unsafe.Pointer{
239+
unsafe.Pointer(&memA),
240+
unsafe.Pointer(&memB),
241+
unsafe.Pointer(&size),
242+
}
243+
244+
// ACTUAL BENCHMARK STARTS HERE
245+
for i := 0; i < bench.N; i++ {
246+
for j := 0; j < 1000; j++ {
247+
if err = MemcpyHtoD(memA, unsafe.Pointer(&a[0]), size); err != nil {
248+
bench.Fatalf("Failed to copy memory from a: %v", err)
249+
}
250+
251+
if err = MemcpyHtoD(memB, unsafe.Pointer(&b[0]), size); err != nil {
252+
bench.Fatalf("Failed to copy memory from b: %v", err)
253+
}
254+
255+
if err = fn.LaunchAndSync(100, 10, 1, 1000, 1, 1, 1, Stream(0), args); err != nil {
256+
bench.Error("Launch and Sync Failed: %v", err)
257+
}
258+
259+
if err = MemcpyDtoH(unsafe.Pointer(&a[0]), memA, size); err != nil {
260+
bench.Fatalf("Failed to copy memory to a: %v", err)
261+
}
262+
263+
if err = MemcpyDtoH(unsafe.Pointer(&b[0]), memB, size); err != nil {
264+
bench.Fatalf("Failed to copy memory to b: %v", err)
265+
}
266+
}
267+
}
268+
MemFree(memA)
269+
MemFree(memB)
270+
Unload(mod)
271+
DestroyContext(&ctx)
272+
273+
}
274+
275+
func BenchmarkBatching(bench *testing.B) {
276+
runtime.LockOSThread()
277+
defer runtime.UnlockOSThread()
278+
279+
var err error
280+
var dev Device
281+
var ctx Context
282+
var mod Module
283+
var fn Function
284+
285+
if dev, ctx, err = testSetup(); err != nil {
286+
if err.Error() == "NoDevice" {
287+
return
288+
}
289+
bench.Fatal(err)
290+
}
291+
292+
if mod, err = LoadData(add32PTX); err != nil {
293+
bench.Fatalf("Cannot load add32: %v", err)
294+
}
295+
296+
if fn, err = mod.Function("add32"); err != nil {
297+
bench.Fatalf("Cannot get add32(): %v", err)
298+
}
299+
300+
a := make([]float32, 1000000)
301+
b := make([]float32, 1000000)
302+
for i := range b {
303+
a[i] = 1
304+
b[i] = 1
305+
}
306+
307+
size := int64(len(a) * 4)
308+
309+
var memA, memB DevicePtr
310+
if memA, err = MemAlloc(size); err != nil {
311+
bench.Fatalf("Failed to allocate for a: %v", err)
312+
}
313+
if memB, err = MemAlloc(size); err != nil {
314+
bench.Fatalf("Failed to allocate for b: %v", err)
315+
}
316+
317+
bctx := NewBatchedContext(ctx, dev)
318+
319+
args := []unsafe.Pointer{
320+
unsafe.Pointer(&memA),
321+
unsafe.Pointer(&memB),
322+
unsafe.Pointer(&size),
323+
}
324+
325+
// ACTUAL BENCHMARK STARTS HERE
326+
for i := 0; i < bench.N; i++ {
327+
for j := 0; j < 1000; j++ {
328+
bctx.MemcpyHtoD(memA, unsafe.Pointer(&a[0]), size)
329+
bctx.MemcpyHtoD(memB, unsafe.Pointer(&b[0]), size)
330+
bctx.LaunchKernel(fn, 100, 10, 1, 1000, 1, 1, 0, Stream(0), args)
331+
bctx.Synchronize()
332+
bctx.MemcpyDtoH(unsafe.Pointer(&a[0]), memA, size)
333+
bctx.MemcpyDtoH(unsafe.Pointer(&b[0]), memB, size)
334+
}
335+
bctx.DoWork()
336+
}
337+
338+
MemFree(memA)
339+
MemFree(memB)
191340
Unload(mod)
192341
DestroyContext(&ctx)
342+
193343
}

0 commit comments

Comments
 (0)