-
Notifications
You must be signed in to change notification settings - Fork 95
/
gpuarray_kernel.c
104 lines (88 loc) · 2.61 KB
/
gpuarray_kernel.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include "gpuarray/kernel.h"
#include "gpuarray/error.h"
#include "gpuarray/types.h"
#include <stdlib.h>
int GpuKernel_init(GpuKernel *k, gpucontext *ctx, unsigned int count,
const char **strs, const size_t *lens, const char *name,
unsigned int argcount, const int *types, int flags,
char **err_str) {
int res = GA_NO_ERROR;
k->args = calloc(argcount, sizeof(void *));
if (k->args == NULL)
return GA_MEMORY_ERROR;
k->k = gpukernel_init(ctx, count, strs, lens, name, argcount, types,
flags, &res, err_str);
if (res != GA_NO_ERROR)
GpuKernel_clear(k);
return res;
}
void GpuKernel_clear(GpuKernel *k) {
if (k->k)
gpukernel_release(k->k);
free(k->args);
k->k = NULL;
k->args = NULL;
}
gpucontext *GpuKernel_context(GpuKernel *k) {
return gpukernel_context(k->k);
}
int GpuKernel_sched(GpuKernel *k, size_t n, size_t *gs, size_t *ls) {
size_t min_l;
size_t max_l;
size_t target_l;
size_t max_g;
size_t target_g;
unsigned int numprocs;
int err;
int want_ls = 0;
err = gpukernel_property(k->k, GA_KERNEL_PROP_MAXLSIZE, &max_l);
if (err != GA_NO_ERROR)
return err;
err = gpukernel_property(k->k, GA_KERNEL_PROP_PREFLSIZE, &min_l);
if (err != GA_NO_ERROR)
return err;
err = gpukernel_property(k->k, GA_CTX_PROP_NUMPROCS, &numprocs);
if (err != GA_NO_ERROR)
return err;
err = gpukernel_property(k->k, GA_CTX_PROP_MAXGSIZE, &max_g);
if (err != GA_NO_ERROR)
return err;
/* Do something about these hardcoded values */
target_g = numprocs * 32;
if (target_g > max_g)
target_g = max_g;
target_l = 512;
if (target_l > max_l)
target_l = max_l;
if (*ls == 0) {
want_ls = 1;
*ls = min_l;
}
if (*gs == 0) {
*gs = ((n-1) / *ls) + 1;
if (*gs > target_g)
*gs = target_g;
}
if (want_ls && n > (*ls * *gs)) {
/* The division and multiplication by min_l is to ensure we end up
* with a multiple of min_l */
*ls = ((n / min_l) / *gs) * min_l;
if (*ls > target_l)
*ls = target_l;
}
return GA_NO_ERROR;
}
int GpuKernel_setarg(GpuKernel *k, unsigned int i, void *a) {
return gpukernel_setarg(k->k, i, a);
}
int GpuKernel_call(GpuKernel *k, unsigned int n,
const size_t *gs, const size_t *ls,
size_t shared, void **args) {
return gpukernel_call(k->k, n, gs, ls, shared, args);
}
int GpuKernel_binary(const GpuKernel *k, size_t *sz, void **bin) {
return gpukernel_binary(k->k, sz, bin);
}
const char *GpuKernel_error(const GpuKernel *k, int err) {
return gpucontext_error(gpukernel_context(k->k), err);
}