diff --git a/ci/test_thirdparty_awkward.sh b/ci/test_thirdparty_awkward.sh index 6349ac1c2..b1ba442f3 100755 --- a/ci/test_thirdparty_awkward.sh +++ b/ci/test_thirdparty_awkward.sh @@ -74,6 +74,21 @@ index 39080a34..0eb3940f 100644 array = rng.integers(50, size=1000) EOF +patch -p1 <<'EOF' +diff --git a/pyproject.toml b/pyproject.toml +index 78ecfba9..80a25474 100644 +--- a/pyproject.toml ++++ b/pyproject.toml +@@ -136,6 +136,7 @@ filterwarnings = [ + "ignore:.*np\\.MachAr.*:DeprecationWarning", + "ignore:module 'sre_.*' is deprecated:DeprecationWarning", + "ignore:Jitify is performing a one-time only warm-up", ++ "ignore:Context.call_conv is deprecated.", + ] + log_cli_level = "INFO" + testpaths = ["tests", "tests-cuda", "tests-cuda-kernels", "tests-cuda-kernels-explicit"] +EOF + rapids-logger "Generate awkward tests" nox -s prepare -- --tests diff --git a/numba_cuda/numba/cuda/cgutils.py b/numba_cuda/numba/cuda/cgutils.py index fee2489bc..93e33642a 100644 --- a/numba_cuda/numba/cuda/cgutils.py +++ b/numba_cuda/numba/cuda/cgutils.py @@ -764,12 +764,12 @@ def _dbg(): with if_unlikely(builder, out_of_bounds_upper): if config.FULL_TRACEBACKS: _dbg() - context.call_conv.return_user_exc(builder, IndexError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,)) out_of_bounds_lower = builder.icmp_signed("<", ind, ind.type(0)) with if_unlikely(builder, out_of_bounds_lower): if config.FULL_TRACEBACKS: _dbg() - context.call_conv.return_user_exc(builder, IndexError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,)) def get_item_pointer2( @@ -936,7 +936,7 @@ def guard_null(context, builder, value, exc_tuple): with builder.if_then(is_scalar_zero(builder, value), likely=False): exc = exc_tuple[0] exc_args = exc_tuple[1:] or None - context.call_conv.return_user_exc(builder, exc, exc_args) + context.fndesc.call_conv.return_user_exc(builder, exc, exc_args) def guard_memory_error(context, builder, pointer, msg=None): @@ -946,7 +946,7 @@ def guard_memory_error(context, builder, pointer, msg=None): assert isinstance(pointer.type, ir.PointerType), pointer.type exc_args = (msg,) if msg else () with builder.if_then(is_null(builder, pointer), likely=False): - context.call_conv.return_user_exc(builder, MemoryError, exc_args) + context.fndesc.call_conv.return_user_exc(builder, MemoryError, exc_args) @contextmanager diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py index 511d28eb4..cbe1dfac2 100644 --- a/numba_cuda/numba/cuda/compiler.py +++ b/numba_cuda/numba/cuda/compiler.py @@ -29,7 +29,7 @@ from numba.cuda.cudadrv.linkable_code import LinkableCode from numba.cuda.descriptor import cuda_target from numba.cuda.flags import CUDAFlags -from numba.cuda.target import CUDACABICallConv +from numba.cuda.core.callconv import CUDACABICallConv, CUDACallConv from numba.cuda.core.compiler import CompilerBase from numba.cuda.core.compiler_machinery import ( FunctionPass, @@ -699,6 +699,8 @@ def compile_cuda( cc=None, max_registers=None, lto=False, + abi="numba", + abi_info=None, ): if cc is None: raise ValueError("Compute Capability must be supplied") @@ -741,6 +743,12 @@ def compile_cuda( flags.max_registers = max_registers flags.lto = lto + if abi == "c": + flags.call_conv = CUDACABICallConv(targetctx) + + if abi_info is not None: + flags.abi_info = abi_info + with utils.numba_target_override(): cres = compile_extra( typingctx=typingctx, @@ -759,57 +767,6 @@ def compile_cuda( return cres -def cabi_wrap_function( - context, lib, fndesc, wrapper_function_name, nvvm_options -): - """ - Wrap a Numba ABI function in a C ABI wrapper at the NVVM IR level. - - The C ABI wrapper will have the same name as the source Python function. - """ - # The wrapper will be contained in a new library that links to the wrapped - # function's library - library = lib.codegen.create_library( - f"{lib.name}_function_", - entry_name=wrapper_function_name, - nvvm_options=nvvm_options, - ) - library.add_linking_library(lib) - - # Determine the caller (C ABI) and wrapper (Numba ABI) function types - argtypes = fndesc.argtypes - restype = fndesc.restype - c_call_conv = CUDACABICallConv(context) - wrapfnty = c_call_conv.get_function_type(restype, argtypes) - fnty = context.call_conv.get_function_type(fndesc.restype, argtypes) - - # Create a new module and declare the callee - wrapper_module = context.create_module("cuda.cabi.wrapper") - func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name) - - # Define the caller - populate it with a call to the callee and return - # its return value - - wrapfn = ir.Function(wrapper_module, wrapfnty, wrapper_function_name) - builder = ir.IRBuilder(wrapfn.append_basic_block("")) - - arginfo = context.get_arg_packer(argtypes) - callargs = arginfo.from_arguments(builder, wrapfn.args) - # We get (status, return_value), but we ignore the status since we - # can't propagate it through the C ABI anyway - _, return_value = context.call_conv.call_function( - builder, func, restype, argtypes, callargs - ) - builder.ret(return_value) - - if config.DUMP_LLVM: - utils.dump_llvm(fndesc, wrapper_module) - - library.add_ir_module(wrapper_module) - library.finalize() - return library - - def kernel_fixup(kernel, debug): if debug: exc_helper = add_exception_store_helper(kernel) @@ -934,7 +891,7 @@ def define_error_gv(postfix): # Implement status check / exception store logic status_code = helper_func.args[0] - call_conv = cuda_target.target_context.call_conv + call_conv = CUDACallConv(cuda_target.target_context) status = call_conv._get_return_status(builder, status_code) # Check error status @@ -1118,23 +1075,16 @@ def _compile_pyfunc_with_fixup( nvvm_options=nvvm_options, cc=cc, forceinline=forceinline, + abi=abi, + abi_info=abi_info, ) resty = cres.signature.return_type if resty and not device and resty != types.void: raise TypeError("CUDA kernel must have void return type.") - tgt = cres.target_context - - if device: - lib = cres.library - if abi == "c": - wrapper_name = abi_info.get("abi_name", pyfunc.__name__) - lib = cabi_wrap_function( - tgt, lib, cres.fndesc, wrapper_name, nvvm_options - ) - else: - lib = cres.library + lib = cres.library + if not device: kernel = lib.get_function(cres.fndesc.llvm_func_name) lib._entry_name = cres.fndesc.llvm_func_name kernel_fixup(kernel, debug) @@ -1355,7 +1305,9 @@ def compile_ptx_for_current_device( ) -def declare_device_function(name, restype, argtypes, link, use_cooperative): +def declare_device_function( + name, restype, argtypes, link, use_cooperative, abi +): from .descriptor import cuda_target typingctx = cuda_target.typing_context @@ -1376,9 +1328,18 @@ def declare_device_function(name, restype, argtypes, link, use_cooperative): lib.add_linking_file(file) lib.use_cooperative = use_cooperative + if abi == "numba": + call_conv = CUDACallConv(targetctx) + elif abi == "c": + call_conv = CUDACABICallConv(targetctx) + else: + raise NotImplementedError(f"Unsupported ABI: {abi}") + # ExternalFunctionDescriptor provides a lowering implementation for calling # external functions - fndesc = funcdesc.ExternalFunctionDescriptor(name, restype, argtypes) + fndesc = funcdesc.ExternalFunctionDescriptor( + name, restype, argtypes, call_conv + ) targetctx.insert_user_function(extfn, fndesc, libs=(lib,)) return device_function_template diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py index f09e512f9..c518f9cb8 100644 --- a/numba_cuda/numba/cuda/core/base.py +++ b/numba_cuda/numba/cuda/core/base.py @@ -468,19 +468,6 @@ def get_external_function_type(self, fndesc): fnty = llvmir.FunctionType(restype, argtypes) return fnty - def declare_function(self, module, fndesc): - fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) - fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) - self.call_conv.decorate_function( - fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias - ) - if fndesc.inline: - fn.attributes.add("alwaysinline") - # alwaysinline overrides optnone - fn.attributes.discard("noinline") - fn.attributes.discard("optnone") - return fn - def declare_external_function(self, module, fndesc): fnty = self.get_external_function_type(fndesc) fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) @@ -975,7 +962,7 @@ def call_internal(self, builder, fndesc, sig, args): builder, fndesc, sig, args ) with cgutils.if_unlikely(builder, status.is_error): - self.call_conv.return_status_propagate(builder, status) + fndesc.call_conv.return_status_propagate(builder, status) res = imputils.fix_returning_optional(self, builder, sig, status, res) return res @@ -986,8 +973,8 @@ def call_internal_no_propagate(self, builder, fndesc, sig, args): """ # Add call to the generated function llvm_mod = builder.module - fn = self.declare_function(llvm_mod, fndesc) - status, res = self.call_conv.call_function( + fn = fndesc.declare_function(llvm_mod) + status, res = fndesc.call_conv.call_function( builder, fn, sig.return_type, sig.args, args ) return status, res diff --git a/numba_cuda/numba/cuda/core/callconv.py b/numba_cuda/numba/cuda/core/callconv.py index c929c25b6..ad69ca21c 100644 --- a/numba_cuda/numba/cuda/core/callconv.py +++ b/numba_cuda/numba/cuda/core/callconv.py @@ -3,6 +3,7 @@ from numba.cuda import types from numba.cuda import cgutils +from numba.cuda import itanium_mangler from collections import namedtuple from llvmlite import ir @@ -161,6 +162,11 @@ def _get_arg_packer(self, argtypes): """ return self.context.get_arg_packer(argtypes) + def mangler(self, name, argtypes, *, abi_tags=(), uid=None): + return itanium_mangler.mangle( + name, argtypes, abi_tags=abi_tags, uid=uid + ) + class MinimalCallConv(BaseCallConv): """ @@ -341,6 +347,93 @@ def get_exception(self, exc_id): return exc, exc_args, locinfo +class CUDACallConv(MinimalCallConv): + def decorate_function(self, fn, args, fe_argtypes, noalias=False): + """ + Set names and attributes of function arguments. + """ + assert not noalias + arginfo = self._get_arg_packer(fe_argtypes) + # Do not prefix "arg." on argument name, so that nvvm compiler + # can track debug info of argument more accurately + arginfo.assign_names(self.get_arguments(fn), args) + fn.args[0].name = ".ret" + + +class CUDACABICallConv(BaseCallConv): + """ + Calling convention aimed at matching the CUDA C/C++ ABI. The implemented + function signature is: + + () + + Exceptions are unsupported in this convention. + """ + + def _make_call_helper(self, builder): + # Call helpers are used to help report exceptions back to Python, so + # none is required here. + return None + + def return_value(self, builder, retval): + return builder.ret(retval) + + def return_user_exc( + self, builder, exc, exc_args=None, loc=None, func_name=None + ): + msg = "Python exceptions are unsupported in the CUDA C/C++ ABI" + raise NotImplementedError(msg) + + def return_status_propagate(self, builder, status): + msg = "Return status is unsupported in the CUDA C/C++ ABI" + raise NotImplementedError(msg) + + def get_function_type(self, restype, argtypes): + """ + Get the LLVM IR Function type for *restype* and *argtypes*. + """ + arginfo = self._get_arg_packer(argtypes) + argtypes = list(arginfo.argument_types) + fnty = ir.FunctionType(self.get_return_type(restype), argtypes) + return fnty + + def decorate_function(self, fn, args, fe_argtypes, noalias=False): + """ + Set names and attributes of function arguments. + """ + assert not noalias + arginfo = self._get_arg_packer(fe_argtypes) + arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args]) + + def get_arguments(self, func): + """ + Get the Python-level arguments of LLVM *func*. + """ + return func.args + + def call_function(self, builder, callee, resty, argtys, args): + """ + Call the Numba-compiled *callee*. + """ + arginfo = self._get_arg_packer(argtys) + realargs = arginfo.as_arguments(builder, args) + code = builder.call(callee, realargs) + # No status required as we don't support exceptions or a distinct None + # value in a C ABI. + status = None + out = self.context.get_returned_value(builder, resty, code) + return status, out + + def get_return_type(self, ty): + return self.context.data_model_manager[ty].get_return_type() + + def mangler(self, name, argtypes, *, abi_tags=None, uid=None): + if name.startswith(".NumbaEnv."): + func_name = name.split(".")[-1] + return f"_ZN08NumbaEnv{func_name}" + return name.split(".")[-1] + + class ErrorModel: def __init__(self, call_conv): self.call_conv = call_conv @@ -385,8 +478,8 @@ class NumpyErrorModel(ErrorModel): } -def create_error_model(model_name, context): +def create_error_model(model_name, call_conv): """ Create an error model instance for the given target context. """ - return error_models[model_name](context.call_conv) + return error_models[model_name](call_conv) diff --git a/numba_cuda/numba/cuda/core/compiler.py b/numba_cuda/numba/cuda/core/compiler.py index 19523a33d..c6a037e53 100644 --- a/numba_cuda/numba/cuda/core/compiler.py +++ b/numba_cuda/numba/cuda/core/compiler.py @@ -10,6 +10,8 @@ from numba.cuda.core.untyped_passes import ExtractByteCode, FixupArgs from numba.cuda.core.targetconfig import ConfigStack +from numba.cuda.core.callconv import CUDACallConv + class _CompileStatus: """ @@ -66,7 +68,11 @@ def _make_subtarget(targetctx, flags): subtargetoptions["enable_nrt"] = True if flags.fastmath: subtargetoptions["fastmath"] = flags.fastmath - error_model = callconv.create_error_model(flags.error_model, targetctx) + + # Only the CUDA Calling convention can raise exceptions, so we assume here + # that it is suitable for creating the error model. + call_conv = CUDACallConv(targetctx) + error_model = callconv.create_error_model(flags.error_model, call_conv) subtargetoptions["error_model"] = error_model return targetctx.subtarget(**subtargetoptions) diff --git a/numba_cuda/numba/cuda/core/funcdesc.py b/numba_cuda/numba/cuda/core/funcdesc.py index f6ed3638d..150272929 100644 --- a/numba_cuda/numba/cuda/core/funcdesc.py +++ b/numba_cuda/numba/cuda/core/funcdesc.py @@ -8,7 +8,7 @@ from collections import defaultdict import importlib -from numba.cuda import types +from numba.cuda import types, cgutils from numba.cuda import itanium_mangler from numba.cuda.utils import _dynamic_modname, _dynamic_module @@ -54,6 +54,8 @@ class FunctionDescriptor: "noalias", "abi_tags", "uid", + "call_conv", + "abi_info", ) def __init__( @@ -76,6 +78,8 @@ def __init__( global_dict=None, abi_tags=(), uid=None, + call_conv=None, + abi_info=None, ): self.native = native self.modname = modname @@ -103,12 +107,17 @@ def __init__( # be chosen at link time. qualprefix = qualifying_prefix(self.modname, self.qualname) self.uid = uid - self.mangled_name = mangler( - qualprefix, - self.argtypes, - abi_tags=abi_tags, - uid=uid, - ) + + if abi_info is not None and "abi_name" in abi_info: + self.mangled_name = abi_info["abi_name"] + else: + self.mangled_name = mangler( + qualprefix, + self.argtypes, + abi_tags=abi_tags, + uid=uid, + ) + if env_name is None: env_name = mangler( ".NumbaEnv.{}".format(qualprefix), @@ -120,6 +129,8 @@ def __init__( self.inline = inline self.noalias = noalias self.abi_tags = abi_tags + self.call_conv = call_conv + self.abi_info = abi_info def lookup_globals(self): """ @@ -219,6 +230,8 @@ def _from_python_function( inline=False, noalias=False, abi_tags=(), + call_conv=None, + abi_info=None, ): ( qualname, @@ -247,9 +260,24 @@ def _from_python_function( global_dict=global_dict, abi_tags=abi_tags, uid=func_ir.func_id.unique_id, + call_conv=call_conv, + abi_info=abi_info, ) return self + def declare_function(self, module): + fnty = self.call_conv.get_function_type(self.restype, self.argtypes) + fn = cgutils.get_or_insert_function(module, fnty, self.mangled_name) + self.call_conv.decorate_function( + fn, self.args, self.argtypes, noalias=self.noalias + ) + if self.inline: + fn.attributes.add("alwaysinline") + # alwaysinline overrides optnone + fn.attributes.discard("noinline") + fn.attributes.discard("optnone") + return fn + class PythonFunctionDescriptor(FunctionDescriptor): """ @@ -269,6 +297,8 @@ def from_specialized_function( inline, noalias, abi_tags, + call_conv, + abi_info, ): """ Build a FunctionDescriptor for a given specialization of a Python @@ -284,6 +314,8 @@ def from_specialized_function( inline=inline, noalias=noalias, abi_tags=abi_tags, + call_conv=call_conv, + abi_info=abi_info, ) @classmethod @@ -308,7 +340,7 @@ class ExternalFunctionDescriptor(FunctionDescriptor): __slots__ = () - def __init__(self, name, restype, argtypes): + def __init__(self, name, restype, argtypes, call_conv): args = ["arg%d" % i for i in range(len(argtypes))] def mangler(a, x, abi_tags, uid=None): @@ -327,4 +359,5 @@ def mangler(a, x, abi_tags, uid=None): kws=None, mangler=mangler, argtypes=argtypes, + call_conv=call_conv, ) diff --git a/numba_cuda/numba/cuda/core/generators.py b/numba_cuda/numba/cuda/core/generators.py index 63d68dbed..27d3422d0 100644 --- a/numba_cuda/numba/cuda/core/generators.py +++ b/numba_cuda/numba/cuda/core/generators.py @@ -76,7 +76,10 @@ def __init__(self, lower): self.geninfo = lower.generator_info self.gentype = self.get_generator_type() self.gendesc = GeneratorDescriptor.from_generator_fndesc( - lower.func_ir, self.fndesc, self.gentype, self.context.mangler + lower.func_ir, + self.fndesc, + self.gentype, + self.fndesc.call_conv.mangler, ) # Helps packing non-omitted arguments into a structure self.arg_packer = self.context.get_data_packer(self.fndesc.argtypes) diff --git a/numba_cuda/numba/cuda/core/imputils.py b/numba_cuda/numba/cuda/core/imputils.py index 8cb07adb9..751022675 100644 --- a/numba_cuda/numba/cuda/core/imputils.py +++ b/numba_cuda/numba/cuda/core/imputils.py @@ -212,15 +212,19 @@ def user_function(fndesc, libs): A wrapper inserting code calling Numba-compiled *fndesc*. """ - def imp(context, builder, sig, args): - func = context.declare_function(builder.module, fndesc) + def imp(context, builder, sig, args, fndesc=fndesc): + func = fndesc.declare_function(builder.module) # env=None assumes this is a nopython function - status, retval = context.call_conv.call_function( + status, retval = fndesc.call_conv.call_function( builder, func, fndesc.restype, fndesc.argtypes, args ) - with cgutils.if_unlikely(builder, status.is_error): - context.call_conv.return_status_propagate(builder, status) + + if status is not None: + with cgutils.if_unlikely(builder, status.is_error): + fndesc.call_conv.return_status_propagate(builder, status) + assert sig.return_type == fndesc.restype + # Reconstruct optional return type retval = fix_returning_optional(context, builder, sig, status, retval) # If the data representations don't match up @@ -240,10 +244,10 @@ def user_generator(gendesc, libs): A wrapper inserting code calling Numba-compiled *gendesc*. """ - def imp(context, builder, sig, args): - func = context.declare_function(builder.module, gendesc) + def imp(context, builder, sig, args, gendesc=gendesc): + func = gendesc.declare_function(builder.module) # env=None assumes this is a nopython function - status, retval = context.call_conv.call_function( + status, retval = gendesc.call_conv.call_function( builder, func, gendesc.restype, gendesc.argtypes, args ) # Return raw status for caller to process StopIteration diff --git a/numba_cuda/numba/cuda/core/optional.py b/numba_cuda/numba/cuda/core/optional.py index 8cad47935..e7880e7e0 100644 --- a/numba_cuda/numba/cuda/core/optional.py +++ b/numba_cuda/numba/cuda/core/optional.py @@ -124,6 +124,6 @@ def optional_to_any(context, builder, fromty, toty, val): validbit = cgutils.as_bool_bit(builder, optval.valid) with builder.if_then(builder.not_(validbit), likely=False): msg = "expected %s, got None" % (fromty.type,) - context.call_conv.return_user_exc(builder, TypeError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, TypeError, (msg,)) return context.cast(builder, optval.data, fromty.type, toty) diff --git a/numba_cuda/numba/cuda/core/pythonapi.py b/numba_cuda/numba/cuda/core/pythonapi.py index 749b3651d..cc8dfe9bd 100644 --- a/numba_cuda/numba/cuda/core/pythonapi.py +++ b/numba_cuda/numba/cuda/core/pythonapi.py @@ -230,7 +230,7 @@ def emit_environment_sentry( ) self.builder.ret(self.get_null_object()) else: - self.context.call_conv.return_user_exc( + self.context.fndesc.call_conv.return_user_exc( self.builder, RuntimeError, (f"missing Environment: {debug_msg}",), @@ -1771,7 +1771,7 @@ def call_jit_code(self, func, sig, args): with has_err: builder.store(status.is_error, is_error_ptr) # Set error state in the Python interpreter - self.context.call_conv.raise_error(builder, self, status) + self.context.fndesc.call_conv.raise_error(builder, self, status) with no_err: # Handle returned value res = imputils.fix_returning_optional( diff --git a/numba_cuda/numba/cuda/core/typed_passes.py b/numba_cuda/numba/cuda/core/typed_passes.py index fa5aefda7..8cd5f3d84 100644 --- a/numba_cuda/numba/cuda/core/typed_passes.py +++ b/numba_cuda/numba/cuda/core/typed_passes.py @@ -31,6 +31,7 @@ compute_cfg_from_blocks, is_operator_or_getitem, ) +from numba.cuda.core.callconv import CUDACallConv from numba.cuda.core import postproc, rewrites, funcdesc, config @@ -325,6 +326,12 @@ def run_pass(self, state): metadata = state.metadata pre_stats = passmanagers.dump_refprune_stats() + call_conv = flags.call_conv + if call_conv is None: + call_conv = CUDACallConv(state.targetctx) + + mangler = call_conv.mangler + msg = "Function %s failed at nopython mode lowering" % ( state.func_id.func_name, ) @@ -336,10 +343,12 @@ def run_pass(self, state): typemap, restype, calltypes, - mangler=targetctx.mangler, + mangler=mangler, inline=flags.forceinline, noalias=flags.noalias, abi_tags=[flags.get_mangle_string()], + call_conv=call_conv, + abi_info=flags.abi_info, ) ) diff --git a/numba_cuda/numba/cuda/cpython/builtins.py b/numba_cuda/numba/cuda/cpython/builtins.py index 8a5eeac09..71d833956 100644 --- a/numba_cuda/numba/cuda/cpython/builtins.py +++ b/numba_cuda/numba/cuda/cpython/builtins.py @@ -446,7 +446,7 @@ def next_impl(context, builder, sig, args): res = call_iternext(context, builder, iterty, iterval) with builder.if_then(builder.not_(res.is_valid()), likely=False): - context.call_conv.return_user_exc(builder, StopIteration, ()) + context.fndesc.call_conv.return_user_exc(builder, StopIteration, ()) return res.yielded_value() diff --git a/numba_cuda/numba/cuda/cpython/charseq.py b/numba_cuda/numba/cuda/cpython/charseq.py index d6738a6f1..b964aecc9 100644 --- a/numba_cuda/numba/cuda/cpython/charseq.py +++ b/numba_cuda/numba/cuda/cpython/charseq.py @@ -244,7 +244,7 @@ def unicode_to_bytes_cast(context, builder, fromty, toty, val): src_length = uni_str.length with builder.if_then(notkind1): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("cannot cast higher than 8-bit unicode_type to bytes",), @@ -315,7 +315,7 @@ def unicode_to_unicode_charseq(context, builder, fromty, toty, val): in_val = builder.zext(builder.load(in_ptr), dstint_t) builder.store(in_val, builder.gep(dst, [loop.index])) else: - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ( @@ -331,7 +331,7 @@ def unicode_to_unicode_charseq(context, builder, fromty, toty, val): in_val = builder.zext(builder.load(in_ptr), dstint_t) builder.store(in_val, builder.gep(dst, [loop.index])) else: - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ( diff --git a/numba_cuda/numba/cuda/cpython/iterators.py b/numba_cuda/numba/cuda/cpython/iterators.py index df7a87b75..e3606070f 100644 --- a/numba_cuda/numba/cuda/cpython/iterators.py +++ b/numba_cuda/numba/cuda/cpython/iterators.py @@ -164,4 +164,4 @@ def iternext_zip(context, builder, sig, args, result): # noqa: F811 builder, builder.and_(status.is_error, builder.not_(status.is_stop_iteration)), ): - context.call_conv.return_status_propagate(builder, status) + context.fndesc.call_conv.return_status_propagate(builder, status) diff --git a/numba_cuda/numba/cuda/cpython/listobj.py b/numba_cuda/numba/cuda/cpython/listobj.py index 68be31663..9e2f1532f 100644 --- a/numba_cuda/numba/cuda/cpython/listobj.py +++ b/numba_cuda/numba/cuda/cpython/listobj.py @@ -117,7 +117,7 @@ def guard_index(self, idx, msg): Raise an error if the index is out of bounds. """ with self._builder.if_then(self.is_out_of_bounds(idx), likely=False): - self._context.call_conv.return_user_exc( + self._context.fndesc.call_conv.return_user_exc( self._builder, IndexError, (msg,) ) @@ -347,7 +347,7 @@ def allocate(cls, context, builder, list_type, nitems): """ ok, self = cls.allocate_ex(context, builder, list_type, nitems) with builder.if_then(builder.not_(ok), likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, MemoryError, ("cannot allocate list",) ) return self @@ -384,7 +384,7 @@ def _payload_realloc(new_allocated): ir.Constant(intp_t, payload_size), ) with builder.if_then(ovf, likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, MemoryError, ("cannot resize list",) ) @@ -654,7 +654,9 @@ def setitem_list(context, builder, sig, args): # noqa: F811 with otherwise: with builder.if_then(builder.icmp_signed("!=", size_delta, zero)): msg = "cannot resize extended list slice with step != 1" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, ValueError, (msg,) + ) with cgutils.for_range_slice_generic( builder, slice.start, slice.stop, slice.step @@ -693,7 +695,9 @@ def delitem_list(context, builder, sig, args): builder.icmp_signed("!=", slice.step, one), likely=False ): msg = "unsupported del list[start:stop:step] with step != 1" - context.call_conv.return_user_exc(builder, NotImplementedError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, NotImplementedError, (msg,) + ) # Compute the real stop, e.g. for dest[2:0] start = slice.start diff --git a/numba_cuda/numba/cuda/cpython/numbers.py b/numba_cuda/numba/cuda/cpython/numbers.py index d3a24b934..4ffe3ad86 100644 --- a/numba_cuda/numba/cuda/cpython/numbers.py +++ b/numba_cuda/numba/cuda/cpython/numbers.py @@ -671,7 +671,8 @@ def real_divmod(context, builder, x, y): floatty = x.type module = builder.module - fname = context.mangler(".numba.python.rem", [x.type]) + call_conv = context.fndesc.call_conv + fname = call_conv.mangler(".numba.python.rem", [x.type]) fnty = ir.FunctionType(floatty, (floatty, floatty, ir.PointerType(floatty))) fn = cgutils.get_or_insert_function(module, fnty, fname) diff --git a/numba_cuda/numba/cuda/cpython/rangeobj.py b/numba_cuda/numba/cuda/cpython/rangeobj.py index 00a75cba8..99b7903d1 100644 --- a/numba_cuda/numba/cuda/cpython/rangeobj.py +++ b/numba_cuda/numba/cuda/cpython/rangeobj.py @@ -158,7 +158,7 @@ def from_range_state(cls, context, builder, state): with cgutils.if_unlikely(builder, zero_step): # step shouldn't be zero - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("range() arg 3 must not be zero",) ) diff --git a/numba_cuda/numba/cuda/cpython/slicing.py b/numba_cuda/numba/cuda/cpython/slicing.py index 0899db994..ed9ae163f 100644 --- a/numba_cuda/numba/cuda/cpython/slicing.py +++ b/numba_cuda/numba/cuda/cpython/slicing.py @@ -240,13 +240,13 @@ def slice_indices(context, builder, sig, args): sli = context.make_helper(builder, sig.args[0], args[0]) with builder.if_then(cgutils.is_neg_int(builder, length), likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("length should not be negative",) ) with builder.if_then( cgutils.is_scalar_zero(builder, sli.step), likely=False ): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("slice step cannot be zero",) ) diff --git a/numba_cuda/numba/cuda/cpython/tupleobj.py b/numba_cuda/numba/cuda/cpython/tupleobj.py index 5292d412d..0483a3960 100644 --- a/numba_cuda/numba/cuda/cpython/tupleobj.py +++ b/numba_cuda/numba/cuda/cpython/tupleobj.py @@ -251,7 +251,9 @@ def getitem_typed(context, builder, sig, args): # Always branch and raise IndexError with builder.if_then(cgutils.true_bit): - context.call_conv.return_user_exc(builder, IndexError, errmsg_oob) + context.fndesc.call_conv.return_user_exc( + builder, IndexError, errmsg_oob + ) # This is unreachable in runtime, # but it exists to not terminate the current basicblock. res = context.get_constant_null(sig.return_type) @@ -264,7 +266,9 @@ def getitem_typed(context, builder, sig, args): switch = builder.switch(idx, bbelse) with builder.goto_block(bbelse): - context.call_conv.return_user_exc(builder, IndexError, errmsg_oob) + context.fndesc.call_conv.return_user_exc( + builder, IndexError, errmsg_oob + ) lrtty = context.get_value_type(sig.return_type) voidptrty = context.get_value_type(types.voidptr) @@ -345,7 +349,9 @@ def getitem_unituple(context, builder, sig, args): # Always branch and raise IndexError with builder.if_then(cgutils.true_bit): - context.call_conv.return_user_exc(builder, IndexError, errmsg_oob) + context.fndesc.call_conv.return_user_exc( + builder, IndexError, errmsg_oob + ) # This is unreachable in runtime, # but it exists to not terminate the current basicblock. res = context.get_constant_null(sig.return_type) @@ -357,7 +363,9 @@ def getitem_unituple(context, builder, sig, args): switch = builder.switch(idx, bbelse) with builder.goto_block(bbelse): - context.call_conv.return_user_exc(builder, IndexError, errmsg_oob) + context.fndesc.call_conv.return_user_exc( + builder, IndexError, errmsg_oob + ) lrtty = context.get_value_type(tupty.dtype) with builder.goto_block(bbend): diff --git a/numba_cuda/numba/cuda/decorators.py b/numba_cuda/numba/cuda/decorators.py index b2ff5932e..6a9f9301a 100644 --- a/numba_cuda/numba/cuda/decorators.py +++ b/numba_cuda/numba/cuda/decorators.py @@ -285,7 +285,7 @@ def autojitwrapper(func): return disp -def declare_device(name, sig, link=None, use_cooperative=False): +def declare_device(name, sig, link=None, use_cooperative=False, abi="numba"): """ Declare the signature of a foreign function. Returns a descriptor that can be used to call the function from a Python kernel. @@ -295,7 +295,11 @@ def declare_device(name, sig, link=None, use_cooperative=False): :param sig: The Numba signature of the function. :param link: External code to link when calling the function. :param use_cooperative: External code requires cooperative launch. + :param abi: The ABI to use for the function. "numba" for Numba ABI, "c" for C ABI. """ + if abi not in ("numba", "c"): + raise NotImplementedError(f"Unsupported ABI: {abi}") + if link is None: link = tuple() else: @@ -308,7 +312,7 @@ def declare_device(name, sig, link=None, use_cooperative=False): raise TypeError(msg) template = declare_device_function( - name, restype, argtypes, link, use_cooperative + name, restype, argtypes, link, use_cooperative, abi ) return template.key diff --git a/numba_cuda/numba/cuda/flags.py b/numba_cuda/numba/cuda/flags.py index 37ff995b6..51d9eb658 100644 --- a/numba_cuda/numba/cuda/flags.py +++ b/numba_cuda/numba/cuda/flags.py @@ -9,6 +9,8 @@ InlineOptions, ) +from numba.cuda.core.callconv import BaseCallConv + class Flags(TargetConfig): __slots__ = () @@ -161,6 +163,24 @@ def _optional_int_type(x): return x +def _call_conv_options_type(x): + if x is None: + return None + + else: + assert isinstance(x, BaseCallConv) + return x + + +def _abi_info_options_type(x): + if x is None: + return {} + + else: + assert isinstance(x, dict) + return x + + class CUDAFlags(Flags): nvvm_options = Option( type=_nvvm_options_type, @@ -176,3 +196,7 @@ class CUDAFlags(Flags): type=_optional_int_type, default=None, doc="Max registers" ) lto = Option(type=bool, default=False, doc="Enable Link-time Optimization") + + call_conv = Option(type=_call_conv_options_type, default=None, doc="") + + abi_info = Option(type=_abi_info_options_type, default=None, doc="ABI info") diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py index c0b18b40a..6184aff06 100644 --- a/numba_cuda/numba/cuda/lowering.py +++ b/numba_cuda/numba/cuda/lowering.py @@ -99,7 +99,7 @@ def __init__(self, context, library, fndesc, func_ir, metadata=None): @property def call_conv(self): - return self.context.call_conv + return self.fndesc.call_conv def init(self): pass @@ -354,7 +354,7 @@ def create_cfunc_wrapper(self): def setup_function(self, fndesc): # Setup function - self.function = self.context.declare_function(self.module, fndesc) + self.function = fndesc.declare_function(self.module) if self.flags.dbg_optnone: attrset = self.function.attributes if "alwaysinline" not in attrset: @@ -1209,7 +1209,7 @@ def _lower_call_RecursiveCall(self, fnty, expr, signature): expr.kws, ) rec_ov = fnty.get_overloads(signature.args) - mangler = self.context.mangler or default_mangler + mangler = self.call_conv.mangler or default_mangler abi_tags = self.fndesc.abi_tags mangled_name = mangler( rec_ov.qualname, signature.args, abi_tags=abi_tags, uid=rec_ov.uid diff --git a/numba_cuda/numba/cuda/memory_management/nrt_context.py b/numba_cuda/numba/cuda/memory_management/nrt_context.py index 3e171779a..f3e6e5f73 100644 --- a/numba_cuda/numba/cuda/memory_management/nrt_context.py +++ b/numba_cuda/numba/cuda/memory_management/nrt_context.py @@ -415,7 +415,7 @@ def get_nrt_api(self, builder): def eh_check(self, builder): """Check if an exception is raised""" ctx = self._context - cc = ctx.call_conv + cc = ctx.fndesc.call_conv # Inspect the excinfo argument on the function trystatus = cc.check_try_status(builder) excinfo = trystatus.excinfo @@ -428,11 +428,11 @@ def eh_check(self, builder): def eh_try(self, builder): """Begin a try-block.""" ctx = self._context - cc = ctx.call_conv + cc = ctx.fndesc.call_conv cc.set_try_status(builder) def eh_end_try(self, builder): """End a try-block""" ctx = self._context - cc = ctx.call_conv + cc = ctx.fndesc.call_conv cc.unset_try_status(builder) diff --git a/numba_cuda/numba/cuda/misc/gdb_hook.py b/numba_cuda/numba/cuda/misc/gdb_hook.py index 2c8bd3c8d..805f952e7 100644 --- a/numba_cuda/numba/cuda/misc/gdb_hook.py +++ b/numba_cuda/numba/cuda/misc/gdb_hook.py @@ -162,14 +162,14 @@ def init_gdb_codegen( invalid_write = builder.icmp_signed(">", stat, int32_t(12)) with builder.if_then(invalid_write, likely=False): msg = "Internal error: `snprintf` buffer would have overflowed." - cgctx.call_conv.return_user_exc(builder, RuntimeError, (msg,)) + cgctx.fndesc.call_conv.return_user_exc(builder, RuntimeError, (msg,)) # fork, check pids etc child_pid = builder.call(fork, tuple()) fork_failed = builder.icmp_signed("==", child_pid, int32_t(-1)) with builder.if_then(fork_failed, likely=False): msg = "Internal error: `fork` failed." - cgctx.call_conv.return_user_exc(builder, RuntimeError, (msg,)) + cgctx.fndesc.call_conv.return_user_exc(builder, RuntimeError, (msg,)) is_child = builder.icmp_signed("==", child_pid, zero_i32t) with builder.if_else(is_child) as (then, orelse): diff --git a/numba_cuda/numba/cuda/np/arrayobj.py b/numba_cuda/numba/cuda/np/arrayobj.py index 115fb63f2..2fcaf4d8c 100644 --- a/numba_cuda/numba/cuda/np/arrayobj.py +++ b/numba_cuda/numba/cuda/np/arrayobj.py @@ -672,7 +672,7 @@ def array_item(context, builder, sig, args): builder.icmp_signed("!=", nitems, nitems.type(1)), likely=False ): msg = "item(): can only convert an array of size 1 to a Python scalar" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, ValueError, (msg,)) return load_item(context, builder, aryty, ary.data) @@ -691,7 +691,9 @@ def array_itemset(context, builder, sig, args): builder.icmp_signed("!=", nitems, nitems.type(1)), likely=False ): msg = "itemset(): can only write to an array of size 1" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, ValueError, (msg,) + ) store_item(context, builder, aryty, val, ary.data) return context.get_dummy_value() @@ -1478,7 +1480,9 @@ def _bc_adjust_dimension(context, builder, shapes, strides, target_shape): # Check error with builder.if_then(builder.not_(accepted), likely=False): msg = "cannot broadcast source array for assignment" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, ValueError, (msg,) + ) # Truncate extra shapes, strides shapes = shapes[nd_diff:] strides = strides[nd_diff:] @@ -2422,7 +2426,9 @@ def array_reshape(context, builder, sig, args): with builder.if_then(fail): msg = "incompatible shape for array" - context.call_conv.return_user_exc(builder, NotImplementedError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, NotImplementedError, (msg,) + ) ret = make_array(retty)(context, builder) populate_array( @@ -3095,7 +3101,7 @@ def array_view(context, builder, sig, args): with builder.if_then(fail): msg = "new type not compatible with array" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, ValueError, (msg,)) res = ret._getvalue() return impl_ret_borrowed(context, builder, sig.return_type, res) @@ -4720,7 +4726,7 @@ def _empty_nd_impl(context, builder, arrtype, shapes): with builder.if_then(overflow, likely=False): # Raise same error as numpy, see: # https://github.com/numpy/numpy/blob/2a488fe76a0f732dc418d03b452caace161673da/numpy/core/src/multiarray/ctors.c#L1095-L1101 # noqa: E501 - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ( @@ -4804,7 +4810,7 @@ def safecast_intp(context, builder, src_t, src): elif src_t.width >= intp_width: is_larger = builder.icmp_signed(">", src, maxval) with builder.if_then(is_larger, likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("Cannot safely convert value to intp",), @@ -4829,7 +4835,7 @@ def safecast_intp(context, builder, src_t, src): for shape in shapes: is_neg = builder.icmp_signed("<", shape, zero) with cgutils.if_unlikely(builder, is_neg): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("negative dimensions not allowed",) ) @@ -5759,7 +5765,9 @@ def codegen(context, builder, sig, args): is_incompatible = cgutils.is_not_null(builder, rem) with builder.if_then(is_incompatible, likely=False): msg = "buffer size must be a multiple of element size" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, ValueError, (msg,) + ) shape = cgutils.pack_array(builder, [builder.sdiv(nbytes, ll_itemsize)]) strides = cgutils.pack_array(builder, [ll_itemsize]) @@ -6014,7 +6022,7 @@ def check_sequence_shape(context, builder, seqty, seq, shapes): """ def _fail(): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("incompatible sequence shape",) ) @@ -6169,7 +6177,7 @@ def _normalize_axis(context, builder, func_name, ndim, axis): ) with builder.if_then(axis_out_of_bounds, likely=False): msg = "%s(): axis out of bounds" % func_name - context.call_conv.return_user_exc(builder, IndexError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,)) return axis @@ -6489,7 +6497,7 @@ def _np_concatenate(context, builder, arrtys, arrs, retty, axis): is_ok, builder.icmp_signed("==", sh, ret_sh) ) with builder.if_then(builder.not_(is_ok), likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ( @@ -6538,7 +6546,7 @@ def _np_stack(context, builder, arrtys, arrs, retty, axis): ): is_ok = builder.and_(is_ok, builder.icmp_signed("==", sh, orig_sh)) with builder.if_then(builder.not_(is_ok), likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("np.stack(): all input arrays must have the same shape",), diff --git a/numba_cuda/numba/cuda/np/math/numbers.py b/numba_cuda/numba/cuda/np/math/numbers.py index cf0875b9c..b4b20cd49 100644 --- a/numba_cuda/numba/cuda/np/math/numbers.py +++ b/numba_cuda/numba/cuda/np/math/numbers.py @@ -657,7 +657,8 @@ def real_divmod(context, builder, x, y): floatty = x.type module = builder.module - fname = context.mangler(".numba.python.rem", [x.type]) + call_conv = context.fndesc.call_conv + fname = call_conv.mangler(".numba.python.rem", [x.type]) fnty = ir.FunctionType(floatty, (floatty, floatty, ir.PointerType(floatty))) fn = cgutils.get_or_insert_function(module, fnty, fname) diff --git a/numba_cuda/numba/cuda/np/npyimpl.py b/numba_cuda/numba/cuda/np/npyimpl.py index a25779d9a..a43e011eb 100644 --- a/numba_cuda/numba/cuda/np/npyimpl.py +++ b/numba_cuda/numba/cuda/np/npyimpl.py @@ -507,7 +507,9 @@ def make_intp_const(val): if loc is not None: msg += '\nFile "%s", line %d, ' % (loc.filename, loc.line) - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, ValueError, (msg,) + ) real_array_ty = array_ty.as_array @@ -799,11 +801,11 @@ def generate(self, *args): for val, inty, outty in zip(args, osig.args, isig.args) ] if self.cres.objectmode: - func_type = self.context.call_conv.get_function_type( + func_type = self.context.fndesc.call_conv.get_function_type( types.pyobject, [types.pyobject] * len(isig.args) ) else: - func_type = self.context.call_conv.get_function_type( + func_type = self.context.fndesc.call_conv.get_function_type( isig.return_type, isig.args ) module = self.builder.block.function.module @@ -812,7 +814,7 @@ def generate(self, *args): ) entry_point.attributes.add("alwaysinline") - _, res = self.context.call_conv.call_function( + _, res = self.context.fndesc.call_conv.call_function( self.builder, entry_point, isig.return_type, isig.args, cast_args ) return self.cast(res, isig.return_type, osig.return_type) diff --git a/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py b/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py index d7b497abe..1e6dc266e 100644 --- a/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +++ b/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py @@ -149,12 +149,12 @@ def to_double(coef): pred2 = builder.icmp_signed("!=", s2, two) with cgutils.if_unlikely(builder, pred1): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("Domain has wrong number of elements.",) ) with cgutils.if_unlikely(builder, pred2): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("Window has wrong number of elements.",) ) diff --git a/numba_cuda/numba/cuda/target.py b/numba_cuda/numba/cuda/target.py index 8e2c9a4fb..cbe39946a 100644 --- a/numba_cuda/numba/cuda/target.py +++ b/numba_cuda/numba/cuda/target.py @@ -29,7 +29,6 @@ from numba.cuda.debuginfo import CUDADIBuilder from numba.cuda.flags import CUDAFlags from numba.cuda.models import cuda_data_manager -from numba.cuda.core.callconv import BaseCallConv, MinimalCallConv from numba.cuda.core import config, targetconfig @@ -266,14 +265,14 @@ def nonconst_module_attrs(self): ) return nonconsts_with_mod - @cached_property + @property def call_conv(self): - return CUDACallConv(self) - - def mangler(self, name, argtypes, *, abi_tags=(), uid=None): - return itanium_mangler.mangle( - name, argtypes, abi_tags=abi_tags, uid=uid + warnings.warn( + "Context.call_conv is deprecated. " + "Use FunctionDescriptor.call_conv instead.", + DeprecationWarning, ) + return self.fndesc.call_conv def make_constant_array(self, builder, aryty, arr): """ @@ -422,84 +421,3 @@ def _compile_subroutine_no_cache( # Allow inlining the function inside callers self.active_code_library.add_linking_library(cres.library) return cres - - -class CUDACallConv(MinimalCallConv): - def decorate_function(self, fn, args, fe_argtypes, noalias=False): - """ - Set names and attributes of function arguments. - """ - assert not noalias - arginfo = self._get_arg_packer(fe_argtypes) - # Do not prefix "arg." on argument name, so that nvvm compiler - # can track debug info of argument more accurately - arginfo.assign_names(self.get_arguments(fn), args) - fn.args[0].name = ".ret" - - -class CUDACABICallConv(BaseCallConv): - """ - Calling convention aimed at matching the CUDA C/C++ ABI. The implemented - function signature is: - - () - - Exceptions are unsupported in this convention. - """ - - def _make_call_helper(self, builder): - # Call helpers are used to help report exceptions back to Python, so - # none is required here. - return None - - def return_value(self, builder, retval): - return builder.ret(retval) - - def return_user_exc( - self, builder, exc, exc_args=None, loc=None, func_name=None - ): - msg = "Python exceptions are unsupported in the CUDA C/C++ ABI" - raise NotImplementedError(msg) - - def return_status_propagate(self, builder, status): - msg = "Return status is unsupported in the CUDA C/C++ ABI" - raise NotImplementedError(msg) - - def get_function_type(self, restype, argtypes): - """ - Get the LLVM IR Function type for *restype* and *argtypes*. - """ - arginfo = self._get_arg_packer(argtypes) - argtypes = list(arginfo.argument_types) - fnty = ir.FunctionType(self.get_return_type(restype), argtypes) - return fnty - - def decorate_function(self, fn, args, fe_argtypes, noalias=False): - """ - Set names and attributes of function arguments. - """ - assert not noalias - arginfo = self._get_arg_packer(fe_argtypes) - arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args]) - - def get_arguments(self, func): - """ - Get the Python-level arguments of LLVM *func*. - """ - return func.args - - def call_function(self, builder, callee, resty, argtys, args): - """ - Call the Numba-compiled *callee*. - """ - arginfo = self._get_arg_packer(argtys) - realargs = arginfo.as_arguments(builder, args) - code = builder.call(callee, realargs) - # No status required as we don't support exceptions or a distinct None - # value in a C ABI. - status = None - out = self.context.get_returned_value(builder, resty, code) - return status, out - - def get_return_type(self, ty): - return self.context.data_model_manager[ty].get_return_type() diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py index ee3691ca1..c2b66bb67 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py @@ -179,7 +179,6 @@ def check_debug_info(self, ptx): # in that module anyway. So this test can only be expected to fail until we # have a proper way of generating device functions with the C ABI without # requiring the hack of generating a wrapper. - @unittest.expectedFailure def test_device_function_with_debug(self): # See Issue #6719 - this ensures that compilation with debug succeeds # with CUDA 11.2 / NVVM 7.0 onwards. Previously it failed because NVVM diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py index 3fa62728b..6698e6337 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py @@ -496,5 +496,139 @@ def kernel(r, x): np.testing.assert_equal(r, x * 5) +times2_cabi_cu = cuda.CUSource(""" +extern "C" __device__ +int times2(int a) +{ + return a * 2; +} +""") + +# 0-arg, 2-arg, and void-return C-ABI device functions. +const42_cabi_cu = cuda.CUSource(""" +extern "C" __device__ +int const42() +{ + return 42; +} +""") + +add2_cabi_cu = cuda.CUSource(""" +extern "C" __device__ +int add2(int a, int b) +{ + return a + b; +} +""") + +consume_cabi_cu = cuda.CUSource(""" +extern "C" __device__ +void consume(int a) +{ + // Prevent trivial removal while still having no externally-visible effect. + asm volatile("" : : "r"(a) : "memory"); +} +""") + +consume_cabi_pointer_cu = cuda.CUSource(""" +extern "C" __device__ +void consume(int *a) +{ + *a = 42; +} +""") + + +@skip_on_cudasim("External functions unsupported in the simulator") +class TestDeclareDeviceCABI(CUDATestCase): + def test_declare_device_cabi_basic(self): + times2 = cuda.declare_device( + "times2", "int32(int32)", link=times2_cabi_cu, abi="c" + ) + + @cuda.jit + def kernel(r, x): + i = cuda.grid(1) + if i < len(r): + r[i] = times2(x[i]) + + x = np.arange(10, dtype=np.int32) + r = np.empty_like(x) + kernel[1, 32](r, x) + np.testing.assert_equal(r, x * 2) + + def test_declare_device_cabi_zero_args(self): + const42 = cuda.declare_device( + "const42", "int32()", link=const42_cabi_cu, abi="c" + ) + + @cuda.jit + def kernel(r, x): + i = cuda.grid(1) + if i < len(r): + r[i] = x[i] + const42() + + x = np.arange(10, dtype=np.int32) + r = np.empty_like(x) + kernel[1, 32](r, x) + np.testing.assert_equal(r, x + 42) + + def test_declare_device_cabi_two_args(self): + add2 = cuda.declare_device( + "add2", "int32(int32, int32)", link=add2_cabi_cu, abi="c" + ) + + @cuda.jit + def kernel(r, x): + i = cuda.grid(1) + if i < len(r): + r[i] = add2(x[i], i) + + x = np.arange(10, dtype=np.int32) + r = np.empty_like(x) + kernel[1, 32](r, x) + np.testing.assert_equal(r, x + np.arange(10, dtype=np.int32)) + + def test_declare_device_cabi_void_return(self): + consume = cuda.declare_device( + "consume", "void(int32)", link=consume_cabi_cu, abi="c" + ) + + @cuda.jit + def kernel(r, x): + i = cuda.grid(1) + if i < len(r): + # The call itself is what we want to validate; output must remain + # correct, proving compilation + linking + invocation succeeded. + consume(x[i]) + r[i] = x[i] * 2 + + x = np.arange(10, dtype=np.int32) + r = np.empty_like(x) + kernel[1, 32](r, x) + np.testing.assert_equal(r, x * 2) + + def test_declare_device_cabi_pointer_return(self): + ffi = cffi.FFI() + consume = cuda.declare_device( + "consume", + types.void(types.CPointer(types.int32)), + link=consume_cabi_pointer_cu, + abi="c", + ) + + @cuda.jit + def kernel(x): + i = cuda.grid(1) + if i < len(x): + ptr = ffi.from_buffer(x[i:]) + consume(ptr) + + x = np.zeros(10, dtype=np.int32) + + kernel[1, 32](x) + np.testing.assert_equal(x, 42) + + if __name__ == "__main__": unittest.main()