-
Notifications
You must be signed in to change notification settings - Fork 224
/
runtime.jl
102 lines (81 loc) · 2.71 KB
/
runtime.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# CUDA-specific runtime libraries
import Base.Sys: WORD_SIZE
## GPU runtime library
# reset the runtime cache from global scope, so that any change triggers recompilation
GPUCompiler.reset_runtime()
# load or build the runtime for the most likely compilation job given a compute capability
function precompile_runtime(caps=CUDA.llvm_compat(LLVM.version()).cap)
dummy_source = FunctionSpec(()->return, Tuple{})
params = CUDACompilerParams()
JuliaContext() do ctx
for cap in caps
# NOTE: this often runs when we don't have a functioning set-up,
# so we don't use CUDACompilerTarget(...) which requires NVML
target = PTXCompilerTarget(; cap=cap)
job = CompilerJob(target, dummy_source, params)
GPUCompiler.load_runtime(job; ctx)
end
end
return
end
struct KernelState
exception_flag::Ptr{Cvoid}
end
@inline @generated kernel_state() = GPUCompiler.kernel_state_value(KernelState)
exception_flag() = kernel_state().exception_flag
function signal_exception()
ptr = exception_flag()
if ptr !== C_NULL
unsafe_store!(convert(Ptr{Int}, ptr), 1)
threadfence_system()
else
@cuprintf("""
WARNING: could not signal exception status to the host, execution will continue.
Please file a bug.
""")
end
return
end
function report_exception(ex)
@cuprintf("""
ERROR: a %s was thrown during kernel execution.
Run Julia on debug level 2 for device stack traces.
""", ex)
return
end
function report_oom(sz)
@cuprintf("ERROR: Out of dynamic GPU memory (trying to allocate %i bytes)\n", sz)
return
end
function report_exception_name(ex)
@cuprintf("""
ERROR: a %s was thrown during kernel execution.
Stacktrace:
""", ex)
return
end
function report_exception_frame(idx, func, file, line)
@cuprintf(" [%i] %s at %s:%i\n", idx, func, file, line)
return
end
## CUDA device library
function load_libdevice(cap; ctx)
parse(LLVM.Module, read(libdevice); ctx)
end
function link_libdevice!(mod::LLVM.Module, cap::VersionNumber, undefined_fns)
ctx = LLVM.context(mod)
# only link if there's undefined __nv_ functions
if !any(fn->startswith(fn, "__nv_"), undefined_fns)
return
end
lib::LLVM.Module = load_libdevice(cap; ctx)
# override libdevice's triple and datalayout to avoid warnings
triple!(lib, triple(mod))
datalayout!(lib, datalayout(mod))
GPUCompiler.link_library!(mod, lib)
@dispose pm=ModulePassManager() begin
push!(metadata(mod)["nvvm-reflect-ftz"],
MDNode([ConstantInt(Int32(1); ctx)]; ctx))
run!(pm, mod)
end
end