-
Notifications
You must be signed in to change notification settings - Fork 54
Add compile_all API
#484
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add compile_all API
#484
Changes from all commits
4d82e84
a126b51
fc21ef0
dbc6c38
97712f5
79d2ab7
2cf6b6c
a318b5d
0c14dde
42cdccf
f778db9
edae868
48548e3
792e8d7
3813c0a
67ec2fa
77f0e85
e69affe
5408f2f
b1e51bf
bf4a5f2
afa8959
851f39b
90287fe
4b31dbc
4a4baf6
1a04d0b
9650d94
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ | |
| funcdesc, | ||
| ) | ||
| from numba.cuda.cudadrv import nvvm, nvrtc | ||
| from numba.cuda.cudadrv.linkable_code import LinkableCode | ||
| from numba.cuda.descriptor import cuda_target | ||
| from numba.cuda.flags import CUDAFlags | ||
| from numba.cuda.target import CUDACABICallConv | ||
|
|
@@ -977,6 +978,175 @@ def define_error_gv(postfix): | |
| return helper_func | ||
|
|
||
|
|
||
| def compile_all( | ||
| pyfunc, | ||
| sig, | ||
| debug=None, | ||
| lineinfo=False, | ||
| device=True, | ||
| fastmath=False, | ||
| cc=None, | ||
| opt=None, | ||
| abi="c", | ||
| abi_info=None, | ||
| output="ltoir", | ||
| forceinline=False, | ||
| launch_bounds=None, | ||
| ): | ||
| """Similar to ``compile()``, but returns a list of PTX codes/LTO-IRs for | ||
| the compiled function and the external functions it depends on. | ||
| If external functions are CUDA C++ source, they will be compiled with | ||
| NVRTC. Other kinds of external function code (e.g. cubins, fatbins, etc.) | ||
| will be added directly to the return list. The output code kind is | ||
| determined by the ``output`` parameter, and defaults to ``"ltoir"``. | ||
| """ | ||
|
|
||
| if output not in ("ptx", "ltoir"): | ||
| raise NotImplementedError(f"Unsupported output type: {output}") | ||
|
|
||
| if forceinline and output != "ltoir": | ||
| raise ValueError("Can only designate forced inlining in LTO-IR") | ||
|
|
||
| lto = output == "ltoir" | ||
|
|
||
| cc = _default_cc(cc) | ||
|
|
||
| lib, resty = _compile_pyfunc_with_fixup( | ||
| pyfunc, | ||
| sig, | ||
| debug=debug, | ||
| lineinfo=lineinfo, | ||
| device=device, | ||
| fastmath=fastmath, | ||
| cc=cc, | ||
| opt=opt, | ||
| abi=abi, | ||
| abi_info=abi_info, | ||
| forceinline=forceinline, | ||
| launch_bounds=launch_bounds, | ||
| lto=lto, | ||
| ) | ||
|
|
||
| if lto: | ||
| code = lib.get_ltoir(cc=cc) | ||
| else: | ||
| code = lib.get_asm_str(cc=cc) | ||
| codes = [code] | ||
|
|
||
| # linking_files | ||
| is_ltoir = output == "ltoir" | ||
| for path_or_obj in lib._linking_files: | ||
| obj = LinkableCode.from_path_or_obj(path_or_obj) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The variable is called
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The objects in |
||
| if obj.kind == "cu": | ||
| code, log = nvrtc.compile( | ||
| obj.data, | ||
| obj.name, | ||
| cc, | ||
| ltoir=is_ltoir, | ||
| lineinfo=lineinfo, | ||
| debug=debug, | ||
| ) | ||
| codes.append(code) | ||
| else: | ||
| codes.append(obj) | ||
|
|
||
| return codes, resty | ||
|
|
||
|
|
||
| def _compile_pyfunc_with_fixup( | ||
| pyfunc, | ||
| sig, | ||
| debug=None, | ||
| lineinfo=False, | ||
| device=True, | ||
| fastmath=False, | ||
| cc=None, | ||
| opt=None, | ||
| abi="c", | ||
| abi_info=None, | ||
| forceinline=False, | ||
| launch_bounds=None, | ||
| lto=False, | ||
| ): | ||
| """Internal method to compile a python function and perform post-processing | ||
|
|
||
| - If pyfunc is a kernel, post-processing includes kernel fixup and setting | ||
| launch bounds. | ||
| - If pyfunc is a device function, post-processing includes ABI wrapper. | ||
|
|
||
| `lto` means that all internal pipeline options use LTO. | ||
|
|
||
| Returns the code library and return type. | ||
| """ | ||
| if abi not in ("numba", "c"): | ||
| raise NotImplementedError(f"Unsupported ABI: {abi}") | ||
|
|
||
| if abi == "c" and not device: | ||
| raise NotImplementedError("The C ABI is not supported for kernels") | ||
|
|
||
| if forceinline and not device: | ||
| raise ValueError("Cannot force-inline kernels") | ||
|
|
||
| debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug | ||
| opt = (config.OPT != 0) if opt is None else opt | ||
|
|
||
| if debug and opt: | ||
| msg = ( | ||
| "debug=True with opt=True " | ||
| "is not supported by CUDA. This may result in a crash" | ||
| " - set debug=False or opt=False." | ||
| ) | ||
| warn(NumbaInvalidConfigWarning(msg)) | ||
|
|
||
| abi_info = abi_info or dict() | ||
|
|
||
| nvvm_options = {"fastmath": fastmath, "opt": 3 if opt else 0} | ||
|
|
||
| if debug: | ||
| nvvm_options["g"] = None | ||
|
|
||
| if lto: | ||
| nvvm_options["gen-lto"] = None | ||
|
|
||
| args, return_type = sigutils.normalize_signature(sig) | ||
|
|
||
| cc = _default_cc(cc) | ||
|
|
||
| cres = compile_cuda( | ||
| pyfunc, | ||
| return_type, | ||
| args, | ||
| debug=debug, | ||
| lineinfo=lineinfo, | ||
| fastmath=fastmath, | ||
| nvvm_options=nvvm_options, | ||
| cc=cc, | ||
| forceinline=forceinline, | ||
| ) | ||
| resty = cres.signature.return_type | ||
|
|
||
| if resty and not device and resty != types.void: | ||
| raise TypeError("CUDA kernel must have void return type.") | ||
|
|
||
| tgt = cres.target_context | ||
|
|
||
| if device: | ||
| lib = cres.library | ||
| if abi == "c": | ||
| wrapper_name = abi_info.get("abi_name", pyfunc.__name__) | ||
| lib = cabi_wrap_function( | ||
| tgt, lib, cres.fndesc, wrapper_name, nvvm_options | ||
| ) | ||
| else: | ||
| lib = cres.library | ||
| kernel = lib.get_function(cres.fndesc.llvm_func_name) | ||
| lib._entry_name = cres.fndesc.llvm_func_name | ||
| kernel_fixup(kernel, debug) | ||
| nvvm.set_launch_bounds(kernel, launch_bounds) | ||
|
|
||
| return lib, resty | ||
|
|
||
|
|
||
| @global_compiler_lock | ||
| def compile( | ||
| pyfunc, | ||
|
|
@@ -1050,82 +1220,28 @@ def compile( | |
| :return: (code, resty): The compiled code and inferred return type | ||
| :rtype: tuple | ||
| """ | ||
| if abi not in ("numba", "c"): | ||
| raise NotImplementedError(f"Unsupported ABI: {abi}") | ||
|
|
||
| if abi == "c" and not device: | ||
| raise NotImplementedError("The C ABI is not supported for kernels") | ||
|
|
||
| if output not in ("ptx", "ltoir"): | ||
| raise NotImplementedError(f"Unsupported output type: {output}") | ||
|
|
||
| if forceinline and not device: | ||
| raise ValueError("Cannot force-inline kernels") | ||
|
|
||
| if forceinline and output != "ltoir": | ||
| raise ValueError("Can only designate forced inlining in LTO-IR") | ||
|
|
||
| debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug | ||
| opt = (config.OPT != 0) if opt is None else opt | ||
|
|
||
| if debug and opt: | ||
| msg = ( | ||
| "debug=True with opt=True " | ||
| "is not supported by CUDA. This may result in a crash" | ||
| " - set debug=False or opt=False." | ||
| ) | ||
| warn(NumbaInvalidConfigWarning(msg)) | ||
|
|
||
| lto = output == "ltoir" | ||
| abi_info = abi_info or dict() | ||
|
|
||
| nvvm_options = {"fastmath": fastmath, "opt": 3 if opt else 0} | ||
|
|
||
| if debug: | ||
| nvvm_options["g"] = None | ||
|
|
||
| if lto: | ||
| nvvm_options["gen-lto"] = None | ||
|
|
||
| args, return_type = sigutils.normalize_signature(sig) | ||
|
|
||
| # If the user has used the config variable to specify a non-default that is | ||
| # greater than the lowest non-deprecated one, then we should default to | ||
| # their specified CC instead of the lowest non-deprecated one. | ||
| MIN_CC = max(config.CUDA_DEFAULT_PTX_CC, nvrtc.get_lowest_supported_cc()) | ||
| cc = cc or MIN_CC | ||
|
|
||
| cres = compile_cuda( | ||
| lib, resty = _compile_pyfunc_with_fixup( | ||
| pyfunc, | ||
| return_type, | ||
| args, | ||
| sig, | ||
| debug=debug, | ||
| lineinfo=lineinfo, | ||
| device=device, | ||
| fastmath=fastmath, | ||
| nvvm_options=nvvm_options, | ||
| cc=cc, | ||
| opt=opt, | ||
| abi=abi, | ||
| abi_info=abi_info, | ||
| forceinline=forceinline, | ||
| launch_bounds=launch_bounds, | ||
| lto=lto, | ||
| ) | ||
| resty = cres.signature.return_type | ||
|
|
||
| if resty and not device and resty != types.void: | ||
| raise TypeError("CUDA kernel must have void return type.") | ||
|
|
||
| tgt = cres.target_context | ||
|
|
||
| if device: | ||
| lib = cres.library | ||
| if abi == "c": | ||
| wrapper_name = abi_info.get("abi_name", pyfunc.__name__) | ||
| lib = cabi_wrap_function( | ||
| tgt, lib, cres.fndesc, wrapper_name, nvvm_options | ||
| ) | ||
| else: | ||
| lib = cres.library | ||
| kernel = lib.get_function(cres.fndesc.llvm_func_name) | ||
| lib._entry_name = cres.fndesc.llvm_func_name | ||
| kernel_fixup(kernel, debug) | ||
| nvvm.set_launch_bounds(kernel, launch_bounds) | ||
|
|
||
| if lto: | ||
| code = lib.get_ltoir(cc=cc) | ||
|
|
@@ -1272,3 +1388,14 @@ class ExternFunction: | |
| def __init__(self, name, sig): | ||
| self.name = name | ||
| self.sig = sig | ||
|
|
||
|
|
||
| def _default_cc(cc): | ||
| """ | ||
| Return default compute capability based on config and nvrtc lowest supported cc. | ||
|
|
||
| If user specifies a cc, return that. | ||
| """ | ||
| if cc: | ||
| return cc | ||
| return max(config.CUDA_DEFAULT_PTX_CC, nvrtc.get_lowest_supported_cc()) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this need to be a hard error? Is
forceinlinea guarantee or can the compiler still ignore it?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I imagine this is because there's no way to designate the inline information when you specify output as PTX. The information can only persist via LTOIR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's correct - you can't inline PTX. This looks like new code on the diff, but this PR is only really moving it from the original
compile()implementation.