From bcee99605b5a4d82f33dde0e5c285f875ce5df61 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 18 Dec 2025 09:59:54 -0800 Subject: [PATCH 01/18] checkpointing 121825 --- numba_cuda/numba/cuda/compiler.py | 19 ++-- numba_cuda/numba/cuda/core/base.py | 8 +- numba_cuda/numba/cuda/core/callconv.py | 86 ++++++++++++++++++- numba_cuda/numba/cuda/core/compiler.py | 7 +- numba_cuda/numba/cuda/core/funcdesc.py | 12 ++- numba_cuda/numba/cuda/core/imputils.py | 6 +- numba_cuda/numba/cuda/core/typed_passes.py | 6 ++ numba_cuda/numba/cuda/flags.py | 10 +++ numba_cuda/numba/cuda/lowering.py | 2 +- numba_cuda/numba/cuda/target.py | 85 +----------------- .../cuda/tests/cudadrv/test_reset_device.py | 1 + 11 files changed, 140 insertions(+), 102 deletions(-) diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py index e38de0a20..cc592babd 100644 --- a/numba_cuda/numba/cuda/compiler.py +++ b/numba_cuda/numba/cuda/compiler.py @@ -29,7 +29,7 @@ from numba.cuda.cudadrv.linkable_code import LinkableCode from numba.cuda.descriptor import cuda_target from numba.cuda.flags import CUDAFlags -from numba.cuda.target import CUDACABICallConv +from numba.cuda.core.callconv import CUDACABICallConv, CUDACallConv from numba.cuda.core.compiler import CompilerBase from numba.cuda.core.compiler_machinery import ( FunctionPass, @@ -699,6 +699,7 @@ def compile_cuda( cc=None, max_registers=None, lto=False, + abi="numba", ): if cc is None: raise ValueError("Compute Capability must be supplied") @@ -741,6 +742,9 @@ def compile_cuda( flags.max_registers = max_registers flags.lto = lto + if abi == "c": + flags.call_conv = CUDACABICallConv(targetctx) + with utils.numba_target_override(): cres = compile_extra( typingctx=typingctx, @@ -1106,6 +1110,7 @@ def _compile_pyfunc_with_fixup( cc = _default_cc(cc) + wrapper_name = abi_info.get("abi_name", pyfunc.__name__) cres = compile_cuda( pyfunc, return_type, @@ -1116,6 +1121,7 @@ def _compile_pyfunc_with_fixup( nvvm_options=nvvm_options, cc=cc, forceinline=forceinline, + abi=abi, ) resty = cres.signature.return_type @@ -1126,11 +1132,10 @@ def _compile_pyfunc_with_fixup( if device: lib = cres.library - if abi == "c": - wrapper_name = abi_info.get("abi_name", pyfunc.__name__) - lib = cabi_wrap_function( - tgt, lib, cres.fndesc, wrapper_name, nvvm_options - ) + # if abi == "c": + # lib = cabi_wrap_function( + # tgt, lib, cres.fndesc, wrapper_name, nvvm_options + # ) else: lib = cres.library kernel = lib.get_function(cres.fndesc.llvm_func_name) @@ -1376,7 +1381,7 @@ def declare_device_function(name, restype, argtypes, link, use_cooperative): # ExternalFunctionDescriptor provides a lowering implementation for calling # external functions - fndesc = funcdesc.ExternalFunctionDescriptor(name, restype, argtypes) + fndesc = funcdesc.ExternalFunctionDescriptor(name, restype, argtypes, CUDACallConv(targetctx)) targetctx.insert_user_function(extfn, fndesc, libs=(lib,)) return device_function_template diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py index 241f7ace4..21b4f6d51 100644 --- a/numba_cuda/numba/cuda/core/base.py +++ b/numba_cuda/numba/cuda/core/base.py @@ -469,9 +469,9 @@ def get_external_function_type(self, fndesc): return fnty def declare_function(self, module, fndesc): - fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) + fnty = fndesc.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) - self.call_conv.decorate_function( + fndesc.call_conv.decorate_function( fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias ) if fndesc.inline: @@ -975,7 +975,7 @@ def call_internal(self, builder, fndesc, sig, args): builder, fndesc, sig, args ) with cgutils.if_unlikely(builder, status.is_error): - self.call_conv.return_status_propagate(builder, status) + fndesc.call_conv.return_status_propagate(builder, status) res = imputils.fix_returning_optional(self, builder, sig, status, res) return res @@ -987,7 +987,7 @@ def call_internal_no_propagate(self, builder, fndesc, sig, args): # Add call to the generated function llvm_mod = builder.module fn = self.declare_function(llvm_mod, fndesc) - status, res = self.call_conv.call_function( + status, res = fndesc.call_conv.call_function( builder, fn, sig.return_type, sig.args, args ) return status, res diff --git a/numba_cuda/numba/cuda/core/callconv.py b/numba_cuda/numba/cuda/core/callconv.py index 2ff231934..2c6b61730 100644 --- a/numba_cuda/numba/cuda/core/callconv.py +++ b/numba_cuda/numba/cuda/core/callconv.py @@ -341,6 +341,88 @@ def get_exception(self, exc_id): return exc, exc_args, locinfo +class CUDACallConv(MinimalCallConv): + def decorate_function(self, fn, args, fe_argtypes, noalias=False): + """ + Set names and attributes of function arguments. + """ + assert not noalias + arginfo = self._get_arg_packer(fe_argtypes) + # Do not prefix "arg." on argument name, so that nvvm compiler + # can track debug info of argument more accurately + arginfo.assign_names(self.get_arguments(fn), args) + fn.args[0].name = ".ret" + + +class CUDACABICallConv(BaseCallConv): + """ + Calling convention aimed at matching the CUDA C/C++ ABI. The implemented + function signature is: + + () + + Exceptions are unsupported in this convention. + """ + + def _make_call_helper(self, builder): + # Call helpers are used to help report exceptions back to Python, so + # none is required here. + return None + + def return_value(self, builder, retval): + return builder.ret(retval) + + def return_user_exc( + self, builder, exc, exc_args=None, loc=None, func_name=None + ): + msg = "Python exceptions are unsupported in the CUDA C/C++ ABI" + raise NotImplementedError(msg) + + def return_status_propagate(self, builder, status): + msg = "Return status is unsupported in the CUDA C/C++ ABI" + raise NotImplementedError(msg) + + def get_function_type(self, restype, argtypes): + """ + Get the LLVM IR Function type for *restype* and *argtypes*. + """ + arginfo = self._get_arg_packer(argtypes) + argtypes = list(arginfo.argument_types) + fnty = ir.FunctionType(self.get_return_type(restype), argtypes) + return fnty + + def decorate_function(self, fn, args, fe_argtypes, noalias=False): + """ + Set names and attributes of function arguments. + """ + assert not noalias + arginfo = self._get_arg_packer(fe_argtypes) + arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args]) + + def get_arguments(self, func): + """ + Get the Python-level arguments of LLVM *func*. + """ + return func.args + + def call_function(self, builder, callee, resty, argtys, args): + """ + Call the Numba-compiled *callee*. + """ + arginfo = self._get_arg_packer(argtys) + realargs = arginfo.as_arguments(builder, args) + code = builder.call(callee, realargs) + # No status required as we don't support exceptions or a distinct None + # value in a C ABI. + status = None + out = self.context.get_returned_value(builder, resty, code) + return status, out + + def get_return_type(self, ty): + return self.context.data_model_manager[ty].get_return_type() + + + class ErrorModel(object): def __init__(self, call_conv): self.call_conv = call_conv @@ -385,8 +467,8 @@ class NumpyErrorModel(ErrorModel): } -def create_error_model(model_name, context): +def create_error_model(model_name, call_conv): """ Create an error model instance for the given target context. """ - return error_models[model_name](context.call_conv) + return error_models[model_name](call_conv) diff --git a/numba_cuda/numba/cuda/core/compiler.py b/numba_cuda/numba/cuda/core/compiler.py index 9cb8a1840..ff71f19ab 100644 --- a/numba_cuda/numba/cuda/core/compiler.py +++ b/numba_cuda/numba/cuda/core/compiler.py @@ -10,6 +10,8 @@ from numba.cuda.core.untyped_passes import ExtractByteCode, FixupArgs from numba.cuda.core.targetconfig import ConfigStack +from numba.cuda.core.callconv import CUDACallConv + class _CompileStatus(object): """ @@ -67,7 +69,10 @@ def _make_subtarget(targetctx, flags): subtargetoptions["enable_nrt"] = True if flags.fastmath: subtargetoptions["fastmath"] = flags.fastmath - error_model = callconv.create_error_model(flags.error_model, targetctx) + + # FIXME: should update everywhere uses error_model to use callconv from fundesc + call_conv = CUDACallConv(targetctx) + error_model = callconv.create_error_model(flags.error_model, call_conv) subtargetoptions["error_model"] = error_model return targetctx.subtarget(**subtargetoptions) diff --git a/numba_cuda/numba/cuda/core/funcdesc.py b/numba_cuda/numba/cuda/core/funcdesc.py index bcb238d00..3ce3fead5 100644 --- a/numba_cuda/numba/cuda/core/funcdesc.py +++ b/numba_cuda/numba/cuda/core/funcdesc.py @@ -12,6 +12,8 @@ from numba.cuda import itanium_mangler from numba.cuda.utils import _dynamic_modname, _dynamic_module +from numba.cuda.core.callconv import CUDACallConv + def default_mangler(name, argtypes, *, abi_tags=(), uid=None): return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags, uid=uid) @@ -54,6 +56,7 @@ class FunctionDescriptor(object): "noalias", "abi_tags", "uid", + "call_conv", ) def __init__( @@ -76,6 +79,7 @@ def __init__( global_dict=None, abi_tags=(), uid=None, + call_conv=None, ): self.native = native self.modname = modname @@ -120,6 +124,7 @@ def __init__( self.inline = inline self.noalias = noalias self.abi_tags = abi_tags + self.call_conv = call_conv def lookup_globals(self): """ @@ -219,6 +224,7 @@ def _from_python_function( inline=False, noalias=False, abi_tags=(), + call_conv=None, ): ( qualname, @@ -247,6 +253,7 @@ def _from_python_function( global_dict=global_dict, abi_tags=abi_tags, uid=func_ir.func_id.unique_id, + call_conv=call_conv, ) return self @@ -269,6 +276,7 @@ def from_specialized_function( inline, noalias, abi_tags, + call_conv, ): """ Build a FunctionDescriptor for a given specialization of a Python @@ -284,6 +292,7 @@ def from_specialized_function( inline=inline, noalias=noalias, abi_tags=abi_tags, + call_conv=call_conv, ) @classmethod @@ -308,7 +317,7 @@ class ExternalFunctionDescriptor(FunctionDescriptor): __slots__ = () - def __init__(self, name, restype, argtypes): + def __init__(self, name, restype, argtypes, call_conv): args = ["arg%d" % i for i in range(len(argtypes))] def mangler(a, x, abi_tags, uid=None): @@ -327,4 +336,5 @@ def mangler(a, x, abi_tags, uid=None): kws=None, mangler=mangler, argtypes=argtypes, + call_conv=call_conv ) diff --git a/numba_cuda/numba/cuda/core/imputils.py b/numba_cuda/numba/cuda/core/imputils.py index 2881c2771..31cf5ea25 100644 --- a/numba_cuda/numba/cuda/core/imputils.py +++ b/numba_cuda/numba/cuda/core/imputils.py @@ -215,11 +215,11 @@ def user_function(fndesc, libs): def imp(context, builder, sig, args): func = context.declare_function(builder.module, fndesc) # env=None assumes this is a nopython function - status, retval = context.call_conv.call_function( + status, retval = fndesc.call_conv.call_function( builder, func, fndesc.restype, fndesc.argtypes, args ) with cgutils.if_unlikely(builder, status.is_error): - context.call_conv.return_status_propagate(builder, status) + fndesc.call_conv.return_status_propagate(builder, status) assert sig.return_type == fndesc.restype # Reconstruct optional return type retval = fix_returning_optional(context, builder, sig, status, retval) @@ -243,7 +243,7 @@ def user_generator(gendesc, libs): def imp(context, builder, sig, args): func = context.declare_function(builder.module, gendesc) # env=None assumes this is a nopython function - status, retval = context.call_conv.call_function( + status, retval = gendesc.call_conv.call_function( builder, func, gendesc.restype, gendesc.argtypes, args ) # Return raw status for caller to process StopIteration diff --git a/numba_cuda/numba/cuda/core/typed_passes.py b/numba_cuda/numba/cuda/core/typed_passes.py index 958e47c51..0dbdb7f24 100644 --- a/numba_cuda/numba/cuda/core/typed_passes.py +++ b/numba_cuda/numba/cuda/core/typed_passes.py @@ -31,6 +31,7 @@ compute_cfg_from_blocks, is_operator_or_getitem, ) +from numba.cuda.core.callconv import CUDACallConv from numba.cuda.core import postproc, rewrites, funcdesc, config @@ -325,6 +326,10 @@ def run_pass(self, state): metadata = state.metadata pre_stats = passmanagers.dump_refprune_stats() + call_conv = flags.call_conv + if call_conv is None: + call_conv = CUDACallConv(state.targetctx) + msg = "Function %s failed at nopython mode lowering" % ( state.func_id.func_name, ) @@ -340,6 +345,7 @@ def run_pass(self, state): inline=flags.forceinline, noalias=flags.noalias, abi_tags=[flags.get_mangle_string()], + call_conv=call_conv ) ) diff --git a/numba_cuda/numba/cuda/flags.py b/numba_cuda/numba/cuda/flags.py index 37ff995b6..836fc385a 100644 --- a/numba_cuda/numba/cuda/flags.py +++ b/numba_cuda/numba/cuda/flags.py @@ -9,6 +9,7 @@ InlineOptions, ) +from numba.cuda.core.callconv import BaseCallConv, CUDACallConv class Flags(TargetConfig): __slots__ = () @@ -160,6 +161,13 @@ def _optional_int_type(x): assert isinstance(x, int) return x +def _call_conv_options_type(x): + if x is None: + return None + + else: + assert isinstance(x, BaseCallConv) + return x class CUDAFlags(Flags): nvvm_options = Option( @@ -176,3 +184,5 @@ class CUDAFlags(Flags): type=_optional_int_type, default=None, doc="Max registers" ) lto = Option(type=bool, default=False, doc="Enable Link-time Optimization") + + call_conv = Option(type=_call_conv_options_type, default=None, doc="") \ No newline at end of file diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py index c849e9ddd..1398d3690 100644 --- a/numba_cuda/numba/cuda/lowering.py +++ b/numba_cuda/numba/cuda/lowering.py @@ -100,7 +100,7 @@ def __init__(self, context, library, fndesc, func_ir, metadata=None): @property def call_conv(self): - return self.context.call_conv + return self.fndesc.call_conv def init(self): pass diff --git a/numba_cuda/numba/cuda/target.py b/numba_cuda/numba/cuda/target.py index 1f2ac98c9..565575812 100644 --- a/numba_cuda/numba/cuda/target.py +++ b/numba_cuda/numba/cuda/target.py @@ -29,7 +29,6 @@ from numba.cuda.debuginfo import CUDADIBuilder from numba.cuda.flags import CUDAFlags from numba.cuda.models import cuda_data_manager -from numba.cuda.core.callconv import BaseCallConv, MinimalCallConv from numba.cuda.core import config, targetconfig @@ -266,9 +265,9 @@ def nonconst_module_attrs(self): ) return nonconsts_with_mod - @cached_property + @property def call_conv(self): - return CUDACallConv(self) + return self.fndesc.call_conv def mangler(self, name, argtypes, *, abi_tags=(), uid=None): return itanium_mangler.mangle( @@ -423,83 +422,3 @@ def _compile_subroutine_no_cache( self.active_code_library.add_linking_library(cres.library) return cres - -class CUDACallConv(MinimalCallConv): - def decorate_function(self, fn, args, fe_argtypes, noalias=False): - """ - Set names and attributes of function arguments. - """ - assert not noalias - arginfo = self._get_arg_packer(fe_argtypes) - # Do not prefix "arg." on argument name, so that nvvm compiler - # can track debug info of argument more accurately - arginfo.assign_names(self.get_arguments(fn), args) - fn.args[0].name = ".ret" - - -class CUDACABICallConv(BaseCallConv): - """ - Calling convention aimed at matching the CUDA C/C++ ABI. The implemented - function signature is: - - () - - Exceptions are unsupported in this convention. - """ - - def _make_call_helper(self, builder): - # Call helpers are used to help report exceptions back to Python, so - # none is required here. - return None - - def return_value(self, builder, retval): - return builder.ret(retval) - - def return_user_exc( - self, builder, exc, exc_args=None, loc=None, func_name=None - ): - msg = "Python exceptions are unsupported in the CUDA C/C++ ABI" - raise NotImplementedError(msg) - - def return_status_propagate(self, builder, status): - msg = "Return status is unsupported in the CUDA C/C++ ABI" - raise NotImplementedError(msg) - - def get_function_type(self, restype, argtypes): - """ - Get the LLVM IR Function type for *restype* and *argtypes*. - """ - arginfo = self._get_arg_packer(argtypes) - argtypes = list(arginfo.argument_types) - fnty = ir.FunctionType(self.get_return_type(restype), argtypes) - return fnty - - def decorate_function(self, fn, args, fe_argtypes, noalias=False): - """ - Set names and attributes of function arguments. - """ - assert not noalias - arginfo = self._get_arg_packer(fe_argtypes) - arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args]) - - def get_arguments(self, func): - """ - Get the Python-level arguments of LLVM *func*. - """ - return func.args - - def call_function(self, builder, callee, resty, argtys, args): - """ - Call the Numba-compiled *callee*. - """ - arginfo = self._get_arg_packer(argtys) - realargs = arginfo.as_arguments(builder, args) - code = builder.call(callee, realargs) - # No status required as we don't support exceptions or a distinct None - # value in a C ABI. - status = None - out = self.context.get_returned_value(builder, resty, code) - return status, out - - def get_return_type(self, ty): - return self.context.data_model_manager[ty].get_return_type() diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py b/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py index e219aa7c4..05b758854 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py @@ -9,6 +9,7 @@ class TestResetDevice(CUDATestCase): + @unittest.skip def test_reset_device(self): def newthread(exception_queue): try: From a988ca3766573e34506415cbfb0e6c83da9e0942 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 6 Jan 2026 08:22:54 -0800 Subject: [PATCH 02/18] checkpointing 010626 --- numba_cuda/numba/cuda/compiler.py | 14 ++++++++------ numba_cuda/numba/cuda/core/base.py | 8 ++++---- numba_cuda/numba/cuda/core/callconv.py | 1 - numba_cuda/numba/cuda/core/compiler.py | 2 +- numba_cuda/numba/cuda/core/funcdesc.py | 4 +--- numba_cuda/numba/cuda/core/imputils.py | 6 +++--- numba_cuda/numba/cuda/core/typed_passes.py | 13 ++++++++++--- numba_cuda/numba/cuda/flags.py | 7 +++++-- numba_cuda/numba/cuda/lowering.py | 2 +- numba_cuda/numba/cuda/target.py | 8 +++++++- .../numba/cuda/tests/cudapy/test_multithreads.py | 1 + 11 files changed, 41 insertions(+), 25 deletions(-) diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py index cc592babd..b87be84f2 100644 --- a/numba_cuda/numba/cuda/compiler.py +++ b/numba_cuda/numba/cuda/compiler.py @@ -936,7 +936,7 @@ def define_error_gv(postfix): # Implement status check / exception store logic status_code = helper_func.args[0] - call_conv = cuda_target.target_context.call_conv + call_conv = CUDACallConv(cuda_target.target_context) status = call_conv._get_return_status(builder, status_code) # Check error status @@ -1110,7 +1110,6 @@ def _compile_pyfunc_with_fixup( cc = _default_cc(cc) - wrapper_name = abi_info.get("abi_name", pyfunc.__name__) cres = compile_cuda( pyfunc, return_type, @@ -1132,10 +1131,11 @@ def _compile_pyfunc_with_fixup( if device: lib = cres.library + # wrapper_name = abi_info.get("abi_name", pyfunc.__name__) # if abi == "c": - # lib = cabi_wrap_function( - # tgt, lib, cres.fndesc, wrapper_name, nvvm_options - # ) + # lib = cabi_wrap_function( + # tgt, lib, cres.fndesc, wrapper_name, nvvm_options + # ) else: lib = cres.library kernel = lib.get_function(cres.fndesc.llvm_func_name) @@ -1381,7 +1381,9 @@ def declare_device_function(name, restype, argtypes, link, use_cooperative): # ExternalFunctionDescriptor provides a lowering implementation for calling # external functions - fndesc = funcdesc.ExternalFunctionDescriptor(name, restype, argtypes, CUDACallConv(targetctx)) + fndesc = funcdesc.ExternalFunctionDescriptor( + name, restype, argtypes, CUDACallConv(targetctx) + ) targetctx.insert_user_function(extfn, fndesc, libs=(lib,)) return device_function_template diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py index 21b4f6d51..241f7ace4 100644 --- a/numba_cuda/numba/cuda/core/base.py +++ b/numba_cuda/numba/cuda/core/base.py @@ -469,9 +469,9 @@ def get_external_function_type(self, fndesc): return fnty def declare_function(self, module, fndesc): - fnty = fndesc.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) + fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) - fndesc.call_conv.decorate_function( + self.call_conv.decorate_function( fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias ) if fndesc.inline: @@ -975,7 +975,7 @@ def call_internal(self, builder, fndesc, sig, args): builder, fndesc, sig, args ) with cgutils.if_unlikely(builder, status.is_error): - fndesc.call_conv.return_status_propagate(builder, status) + self.call_conv.return_status_propagate(builder, status) res = imputils.fix_returning_optional(self, builder, sig, status, res) return res @@ -987,7 +987,7 @@ def call_internal_no_propagate(self, builder, fndesc, sig, args): # Add call to the generated function llvm_mod = builder.module fn = self.declare_function(llvm_mod, fndesc) - status, res = fndesc.call_conv.call_function( + status, res = self.call_conv.call_function( builder, fn, sig.return_type, sig.args, args ) return status, res diff --git a/numba_cuda/numba/cuda/core/callconv.py b/numba_cuda/numba/cuda/core/callconv.py index 2c6b61730..7cffbbf73 100644 --- a/numba_cuda/numba/cuda/core/callconv.py +++ b/numba_cuda/numba/cuda/core/callconv.py @@ -422,7 +422,6 @@ def get_return_type(self, ty): return self.context.data_model_manager[ty].get_return_type() - class ErrorModel(object): def __init__(self, call_conv): self.call_conv = call_conv diff --git a/numba_cuda/numba/cuda/core/compiler.py b/numba_cuda/numba/cuda/core/compiler.py index ff71f19ab..19bfcb628 100644 --- a/numba_cuda/numba/cuda/core/compiler.py +++ b/numba_cuda/numba/cuda/core/compiler.py @@ -69,7 +69,7 @@ def _make_subtarget(targetctx, flags): subtargetoptions["enable_nrt"] = True if flags.fastmath: subtargetoptions["fastmath"] = flags.fastmath - + # FIXME: should update everywhere uses error_model to use callconv from fundesc call_conv = CUDACallConv(targetctx) error_model = callconv.create_error_model(flags.error_model, call_conv) diff --git a/numba_cuda/numba/cuda/core/funcdesc.py b/numba_cuda/numba/cuda/core/funcdesc.py index 3ce3fead5..d90a83947 100644 --- a/numba_cuda/numba/cuda/core/funcdesc.py +++ b/numba_cuda/numba/cuda/core/funcdesc.py @@ -12,8 +12,6 @@ from numba.cuda import itanium_mangler from numba.cuda.utils import _dynamic_modname, _dynamic_module -from numba.cuda.core.callconv import CUDACallConv - def default_mangler(name, argtypes, *, abi_tags=(), uid=None): return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags, uid=uid) @@ -336,5 +334,5 @@ def mangler(a, x, abi_tags, uid=None): kws=None, mangler=mangler, argtypes=argtypes, - call_conv=call_conv + call_conv=call_conv, ) diff --git a/numba_cuda/numba/cuda/core/imputils.py b/numba_cuda/numba/cuda/core/imputils.py index 31cf5ea25..2881c2771 100644 --- a/numba_cuda/numba/cuda/core/imputils.py +++ b/numba_cuda/numba/cuda/core/imputils.py @@ -215,11 +215,11 @@ def user_function(fndesc, libs): def imp(context, builder, sig, args): func = context.declare_function(builder.module, fndesc) # env=None assumes this is a nopython function - status, retval = fndesc.call_conv.call_function( + status, retval = context.call_conv.call_function( builder, func, fndesc.restype, fndesc.argtypes, args ) with cgutils.if_unlikely(builder, status.is_error): - fndesc.call_conv.return_status_propagate(builder, status) + context.call_conv.return_status_propagate(builder, status) assert sig.return_type == fndesc.restype # Reconstruct optional return type retval = fix_returning_optional(context, builder, sig, status, retval) @@ -243,7 +243,7 @@ def user_generator(gendesc, libs): def imp(context, builder, sig, args): func = context.declare_function(builder.module, gendesc) # env=None assumes this is a nopython function - status, retval = gendesc.call_conv.call_function( + status, retval = context.call_conv.call_function( builder, func, gendesc.restype, gendesc.argtypes, args ) # Return raw status for caller to process StopIteration diff --git a/numba_cuda/numba/cuda/core/typed_passes.py b/numba_cuda/numba/cuda/core/typed_passes.py index 0dbdb7f24..28c44925b 100644 --- a/numba_cuda/numba/cuda/core/typed_passes.py +++ b/numba_cuda/numba/cuda/core/typed_passes.py @@ -31,7 +31,7 @@ compute_cfg_from_blocks, is_operator_or_getitem, ) -from numba.cuda.core.callconv import CUDACallConv +from numba.cuda.core.callconv import CUDACABICallConv, CUDACallConv from numba.cuda.core import postproc, rewrites, funcdesc, config @@ -330,6 +330,13 @@ def run_pass(self, state): if call_conv is None: call_conv = CUDACallConv(state.targetctx) + if isinstance(call_conv, CUDACABICallConv): + mangler = targetctx.c_abi_mangler + else: + mangler = targetctx.mangler + + # mangler = targetctx.mangler + msg = "Function %s failed at nopython mode lowering" % ( state.func_id.func_name, ) @@ -341,11 +348,11 @@ def run_pass(self, state): typemap, restype, calltypes, - mangler=targetctx.mangler, + mangler=mangler, inline=flags.forceinline, noalias=flags.noalias, abi_tags=[flags.get_mangle_string()], - call_conv=call_conv + call_conv=call_conv, ) ) diff --git a/numba_cuda/numba/cuda/flags.py b/numba_cuda/numba/cuda/flags.py index 836fc385a..cd265c73a 100644 --- a/numba_cuda/numba/cuda/flags.py +++ b/numba_cuda/numba/cuda/flags.py @@ -9,7 +9,8 @@ InlineOptions, ) -from numba.cuda.core.callconv import BaseCallConv, CUDACallConv +from numba.cuda.core.callconv import BaseCallConv + class Flags(TargetConfig): __slots__ = () @@ -161,6 +162,7 @@ def _optional_int_type(x): assert isinstance(x, int) return x + def _call_conv_options_type(x): if x is None: return None @@ -169,6 +171,7 @@ def _call_conv_options_type(x): assert isinstance(x, BaseCallConv) return x + class CUDAFlags(Flags): nvvm_options = Option( type=_nvvm_options_type, @@ -185,4 +188,4 @@ class CUDAFlags(Flags): ) lto = Option(type=bool, default=False, doc="Enable Link-time Optimization") - call_conv = Option(type=_call_conv_options_type, default=None, doc="") \ No newline at end of file + call_conv = Option(type=_call_conv_options_type, default=None, doc="") diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py index 1398d3690..c849e9ddd 100644 --- a/numba_cuda/numba/cuda/lowering.py +++ b/numba_cuda/numba/cuda/lowering.py @@ -100,7 +100,7 @@ def __init__(self, context, library, fndesc, func_ir, metadata=None): @property def call_conv(self): - return self.fndesc.call_conv + return self.context.call_conv def init(self): pass diff --git a/numba_cuda/numba/cuda/target.py b/numba_cuda/numba/cuda/target.py index 565575812..d8ee64adf 100644 --- a/numba_cuda/numba/cuda/target.py +++ b/numba_cuda/numba/cuda/target.py @@ -274,6 +274,13 @@ def mangler(self, name, argtypes, *, abi_tags=(), uid=None): name, argtypes, abi_tags=abi_tags, uid=uid ) + def c_abi_mangler(self, name, argtypes, *, abi_tags=None, uid=None): + if name.startswith(".NumbaEnv."): + # return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags, uid=uid) + func_name = name.split(".")[-1] + return f"_ZN08NumbaEnv{func_name}" + return name.split(".")[-1] + def make_constant_array(self, builder, aryty, arr): """ Unlike the parent version. This returns a a pointer in the constant @@ -421,4 +428,3 @@ def _compile_subroutine_no_cache( # Allow inlining the function inside callers self.active_code_library.add_linking_library(cres.library) return cres - diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py index d432d2939..85ac77723 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py @@ -37,6 +37,7 @@ def use_foo(x): @skip_under_cuda_memcheck("Hangs cuda-memcheck") @skip_on_cudasim("disabled for cudasim") +@unittest.skip class TestMultiThreadCompiling(CUDATestCase): def test_concurrent_compiling(self): check_concurrent_compiling() From 1419998ebb2a1f7b58ebaf47bfcb01e5dec2d935 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 13 Jan 2026 09:52:06 -0800 Subject: [PATCH 03/18] initial --- numba_cuda/numba/cuda/compiler.py | 25 ++-- numba_cuda/numba/cuda/core/base.py | 6 +- numba_cuda/numba/cuda/core/funcdesc.py | 24 +++- numba_cuda/numba/cuda/core/imputils.py | 11 +- numba_cuda/numba/cuda/core/typed_passes.py | 1 + numba_cuda/numba/cuda/decorators.py | 7 +- numba_cuda/numba/cuda/flags.py | 11 ++ .../numba/cuda/tests/cudapy/test_compiler.py | 1 - .../cuda/tests/cudapy/test_device_func.py | 107 ++++++++++++++++++ 9 files changed, 170 insertions(+), 23 deletions(-) diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py index b87be84f2..e9b47a94e 100644 --- a/numba_cuda/numba/cuda/compiler.py +++ b/numba_cuda/numba/cuda/compiler.py @@ -700,6 +700,7 @@ def compile_cuda( max_registers=None, lto=False, abi="numba", + abi_info=None, ): if cc is None: raise ValueError("Compute Capability must be supplied") @@ -745,6 +746,9 @@ def compile_cuda( if abi == "c": flags.call_conv = CUDACABICallConv(targetctx) + if abi_info is not None: + flags.abi_info = abi_info + with utils.numba_target_override(): cres = compile_extra( typingctx=typingctx, @@ -1121,21 +1125,15 @@ def _compile_pyfunc_with_fixup( cc=cc, forceinline=forceinline, abi=abi, + abi_info=abi_info, ) resty = cres.signature.return_type if resty and not device and resty != types.void: raise TypeError("CUDA kernel must have void return type.") - tgt = cres.target_context - if device: lib = cres.library - # wrapper_name = abi_info.get("abi_name", pyfunc.__name__) - # if abi == "c": - # lib = cabi_wrap_function( - # tgt, lib, cres.fndesc, wrapper_name, nvvm_options - # ) else: lib = cres.library kernel = lib.get_function(cres.fndesc.llvm_func_name) @@ -1358,7 +1356,9 @@ def compile_ptx_for_current_device( ) -def declare_device_function(name, restype, argtypes, link, use_cooperative): +def declare_device_function( + name, restype, argtypes, link, use_cooperative, abi +): from .descriptor import cuda_target typingctx = cuda_target.typing_context @@ -1379,10 +1379,17 @@ def declare_device_function(name, restype, argtypes, link, use_cooperative): lib.add_linking_file(file) lib.use_cooperative = use_cooperative + if abi == "numba": + call_conv = CUDACallConv(targetctx) + elif abi == "c": + call_conv = CUDACABICallConv(targetctx) + else: + raise NotImplementedError(f"Unsupported ABI: {abi}") + # ExternalFunctionDescriptor provides a lowering implementation for calling # external functions fndesc = funcdesc.ExternalFunctionDescriptor( - name, restype, argtypes, CUDACallConv(targetctx) + name, restype, argtypes, call_conv ) targetctx.insert_user_function(extfn, fndesc, libs=(lib,)) diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py index 241f7ace4..38e15237c 100644 --- a/numba_cuda/numba/cuda/core/base.py +++ b/numba_cuda/numba/cuda/core/base.py @@ -469,9 +469,11 @@ def get_external_function_type(self, fndesc): return fnty def declare_function(self, module, fndesc): - fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) + fnty = fndesc.call_conv.get_function_type( + fndesc.restype, fndesc.argtypes + ) fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) - self.call_conv.decorate_function( + fndesc.call_conv.decorate_function( fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias ) if fndesc.inline: diff --git a/numba_cuda/numba/cuda/core/funcdesc.py b/numba_cuda/numba/cuda/core/funcdesc.py index d90a83947..db6bc7ba1 100644 --- a/numba_cuda/numba/cuda/core/funcdesc.py +++ b/numba_cuda/numba/cuda/core/funcdesc.py @@ -55,6 +55,7 @@ class FunctionDescriptor(object): "abi_tags", "uid", "call_conv", + "abi_info", ) def __init__( @@ -78,6 +79,7 @@ def __init__( abi_tags=(), uid=None, call_conv=None, + abi_info=None, ): self.native = native self.modname = modname @@ -105,12 +107,17 @@ def __init__( # be chosen at link time. qualprefix = qualifying_prefix(self.modname, self.qualname) self.uid = uid - self.mangled_name = mangler( - qualprefix, - self.argtypes, - abi_tags=abi_tags, - uid=uid, - ) + + if abi_info is not None and "abi_name" in abi_info: + self.mangled_name = abi_info["abi_name"] + else: + self.mangled_name = mangler( + qualprefix, + self.argtypes, + abi_tags=abi_tags, + uid=uid, + ) + if env_name is None: env_name = mangler( ".NumbaEnv.{}".format(qualprefix), @@ -123,6 +130,7 @@ def __init__( self.noalias = noalias self.abi_tags = abi_tags self.call_conv = call_conv + self.abi_info = abi_info def lookup_globals(self): """ @@ -223,6 +231,7 @@ def _from_python_function( noalias=False, abi_tags=(), call_conv=None, + abi_info=None, ): ( qualname, @@ -252,6 +261,7 @@ def _from_python_function( abi_tags=abi_tags, uid=func_ir.func_id.unique_id, call_conv=call_conv, + abi_info=abi_info, ) return self @@ -275,6 +285,7 @@ def from_specialized_function( noalias, abi_tags, call_conv, + abi_info, ): """ Build a FunctionDescriptor for a given specialization of a Python @@ -291,6 +302,7 @@ def from_specialized_function( noalias=noalias, abi_tags=abi_tags, call_conv=call_conv, + abi_info=abi_info, ) @classmethod diff --git a/numba_cuda/numba/cuda/core/imputils.py b/numba_cuda/numba/cuda/core/imputils.py index 2881c2771..30e19a264 100644 --- a/numba_cuda/numba/cuda/core/imputils.py +++ b/numba_cuda/numba/cuda/core/imputils.py @@ -215,13 +215,18 @@ def user_function(fndesc, libs): def imp(context, builder, sig, args): func = context.declare_function(builder.module, fndesc) # env=None assumes this is a nopython function - status, retval = context.call_conv.call_function( + status, retval = fndesc.call_conv.call_function( builder, func, fndesc.restype, fndesc.argtypes, args ) - with cgutils.if_unlikely(builder, status.is_error): - context.call_conv.return_status_propagate(builder, status) + + if status is not None: + with cgutils.if_unlikely(builder, status.is_error): + context.call_conv.return_status_propagate(builder, status) + assert sig.return_type == fndesc.restype + # Reconstruct optional return type + # XXX: What do we do now when we use CABI? retval = fix_returning_optional(context, builder, sig, status, retval) # If the data representations don't match up if retval.type != context.get_value_type(sig.return_type): diff --git a/numba_cuda/numba/cuda/core/typed_passes.py b/numba_cuda/numba/cuda/core/typed_passes.py index 28c44925b..bcab3d889 100644 --- a/numba_cuda/numba/cuda/core/typed_passes.py +++ b/numba_cuda/numba/cuda/core/typed_passes.py @@ -353,6 +353,7 @@ def run_pass(self, state): noalias=flags.noalias, abi_tags=[flags.get_mangle_string()], call_conv=call_conv, + abi_info=flags.abi_info, ) ) diff --git a/numba_cuda/numba/cuda/decorators.py b/numba_cuda/numba/cuda/decorators.py index 8d8b90817..f63a8f68b 100644 --- a/numba_cuda/numba/cuda/decorators.py +++ b/numba_cuda/numba/cuda/decorators.py @@ -265,7 +265,7 @@ def autojitwrapper(func): return disp -def declare_device(name, sig, link=None, use_cooperative=False): +def declare_device(name, sig, link=None, use_cooperative=False, abi="numba"): """ Declare the signature of a foreign function. Returns a descriptor that can be used to call the function from a Python kernel. @@ -276,6 +276,9 @@ def declare_device(name, sig, link=None, use_cooperative=False): :param link: External code to link when calling the function. :param use_cooperative: External code requires cooperative launch. """ + if abi not in ("numba", "c"): + raise NotImplementedError(f"Unsupported ABI: {abi}") + if link is None: link = tuple() else: @@ -288,7 +291,7 @@ def declare_device(name, sig, link=None, use_cooperative=False): raise TypeError(msg) template = declare_device_function( - name, restype, argtypes, link, use_cooperative + name, restype, argtypes, link, use_cooperative, abi ) return template.key diff --git a/numba_cuda/numba/cuda/flags.py b/numba_cuda/numba/cuda/flags.py index cd265c73a..51d9eb658 100644 --- a/numba_cuda/numba/cuda/flags.py +++ b/numba_cuda/numba/cuda/flags.py @@ -172,6 +172,15 @@ def _call_conv_options_type(x): return x +def _abi_info_options_type(x): + if x is None: + return {} + + else: + assert isinstance(x, dict) + return x + + class CUDAFlags(Flags): nvvm_options = Option( type=_nvvm_options_type, @@ -189,3 +198,5 @@ class CUDAFlags(Flags): lto = Option(type=bool, default=False, doc="Enable Link-time Optimization") call_conv = Option(type=_call_conv_options_type, default=None, doc="") + + abi_info = Option(type=_abi_info_options_type, default=None, doc="ABI info") diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py index 31c847c20..ed4b152fa 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_compiler.py @@ -178,7 +178,6 @@ def check_debug_info(self, ptx): # in that module anyway. So this test can only be expected to fail until we # have a proper way of generating device functions with the C ABI without # requiring the hack of generating a wrapper. - @unittest.expectedFailure def test_device_function_with_debug(self): # See Issue #6719 - this ensures that compilation with debug succeeds # with CUDA 11.2 / NVVM 7.0 onwards. Previously it failed because NVVM diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py index 3fa62728b..01a2c0b5e 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py @@ -496,5 +496,112 @@ def kernel(r, x): np.testing.assert_equal(r, x * 5) +# XXX: Range from 0 - 2 input arguments +# XXX: void return type? +times2_cabi_cu = cuda.CUSource(""" +extern "C" __device__ +int times2(int a) +{ + return a * 2; +} +""") + +# 0-arg, 2-arg, and void-return C-ABI device functions. +const42_cabi_cu = cuda.CUSource(""" +extern "C" __device__ +int const42() +{ + return 42; +} +""") + +add2_cabi_cu = cuda.CUSource(""" +extern "C" __device__ +int add2(int a, int b) +{ + return a + b; +} +""") + +consume_cabi_cu = cuda.CUSource(""" +extern "C" __device__ +void consume(int a) +{ + // Prevent trivial removal while still having no externally-visible effect. + asm volatile("" : : "r"(a) : "memory"); +} +""") + + +@skip_on_cudasim("External functions unsupported in the simulator") +class TestDeclareDeviceCABI(CUDATestCase): + def test_declare_device_cabi_basic(self): + times2 = cuda.declare_device( + "times2", "int32(int32)", link=times2_cabi_cu, abi="c" + ) + + @cuda.jit + def kernel(r, x): + i = cuda.grid(1) + if i < len(r): + r[i] = times2(x[i]) + + x = np.arange(10, dtype=np.int32) + r = np.empty_like(x) + kernel[1, 32](r, x) + np.testing.assert_equal(r, x * 2) + + def test_declare_device_cabi_zero_args(self): + const42 = cuda.declare_device( + "const42", "int32()", link=const42_cabi_cu, abi="c" + ) + + @cuda.jit + def kernel(r, x): + i = cuda.grid(1) + if i < len(r): + r[i] = x[i] + const42() + + x = np.arange(10, dtype=np.int32) + r = np.empty_like(x) + kernel[1, 32](r, x) + np.testing.assert_equal(r, x + 42) + + def test_declare_device_cabi_two_args(self): + add2 = cuda.declare_device( + "add2", "int32(int32, int32)", link=add2_cabi_cu, abi="c" + ) + + @cuda.jit + def kernel(r, x): + i = cuda.grid(1) + if i < len(r): + r[i] = add2(x[i], i) + + x = np.arange(10, dtype=np.int32) + r = np.empty_like(x) + kernel[1, 32](r, x) + np.testing.assert_equal(r, x + np.arange(10, dtype=np.int32)) + + def test_declare_device_cabi_void_return(self): + consume = cuda.declare_device( + "consume", "void(int32)", link=consume_cabi_cu, abi="c" + ) + + @cuda.jit + def kernel(r, x): + i = cuda.grid(1) + if i < len(r): + # The call itself is what we want to validate; output must remain + # correct, proving compilation + linking + invocation succeeded. + consume(x[i]) + r[i] = x[i] * 2 + + x = np.arange(10, dtype=np.int32) + r = np.empty_like(x) + kernel[1, 32](r, x) + np.testing.assert_equal(r, x * 2) + + if __name__ == "__main__": unittest.main() From e60bf0fe66b3f4c39cfcf0f0ae44c01aa05263e4 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 13 Jan 2026 10:10:04 -0800 Subject: [PATCH 04/18] remove stale skip --- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py | 1 - numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py | 1 - 2 files changed, 2 deletions(-) diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py b/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py index 05b758854..e219aa7c4 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py @@ -9,7 +9,6 @@ class TestResetDevice(CUDATestCase): - @unittest.skip def test_reset_device(self): def newthread(exception_queue): try: diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py index 85ac77723..d432d2939 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py @@ -37,7 +37,6 @@ def use_foo(x): @skip_under_cuda_memcheck("Hangs cuda-memcheck") @skip_on_cudasim("disabled for cudasim") -@unittest.skip class TestMultiThreadCompiling(CUDATestCase): def test_concurrent_compiling(self): check_concurrent_compiling() From 72f8f8e23d2cf4593ab1214fd1f51981b4176e99 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 13 Jan 2026 10:13:25 -0800 Subject: [PATCH 05/18] remove cabi_wrap_function --- numba_cuda/numba/cuda/compiler.py | 51 ------------------------------- 1 file changed, 51 deletions(-) diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py index e9b47a94e..64a5b9dac 100644 --- a/numba_cuda/numba/cuda/compiler.py +++ b/numba_cuda/numba/cuda/compiler.py @@ -767,57 +767,6 @@ def compile_cuda( return cres -def cabi_wrap_function( - context, lib, fndesc, wrapper_function_name, nvvm_options -): - """ - Wrap a Numba ABI function in a C ABI wrapper at the NVVM IR level. - - The C ABI wrapper will have the same name as the source Python function. - """ - # The wrapper will be contained in a new library that links to the wrapped - # function's library - library = lib.codegen.create_library( - f"{lib.name}_function_", - entry_name=wrapper_function_name, - nvvm_options=nvvm_options, - ) - library.add_linking_library(lib) - - # Determine the caller (C ABI) and wrapper (Numba ABI) function types - argtypes = fndesc.argtypes - restype = fndesc.restype - c_call_conv = CUDACABICallConv(context) - wrapfnty = c_call_conv.get_function_type(restype, argtypes) - fnty = context.call_conv.get_function_type(fndesc.restype, argtypes) - - # Create a new module and declare the callee - wrapper_module = context.create_module("cuda.cabi.wrapper") - func = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name) - - # Define the caller - populate it with a call to the callee and return - # its return value - - wrapfn = ir.Function(wrapper_module, wrapfnty, wrapper_function_name) - builder = ir.IRBuilder(wrapfn.append_basic_block("")) - - arginfo = context.get_arg_packer(argtypes) - callargs = arginfo.from_arguments(builder, wrapfn.args) - # We get (status, return_value), but we ignore the status since we - # can't propagate it through the C ABI anyway - _, return_value = context.call_conv.call_function( - builder, func, restype, argtypes, callargs - ) - builder.ret(return_value) - - if config.DUMP_LLVM: - utils.dump_llvm(fndesc, wrapper_module) - - library.add_ir_module(wrapper_module) - library.finalize() - return library - - def kernel_fixup(kernel, debug): if debug: exc_helper = add_exception_store_helper(kernel) From f0f3057696c64e2a07d01abefe044aa313799bfe Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 21 Jan 2026 09:34:16 -0800 Subject: [PATCH 06/18] lift definition to pre-branching --- numba_cuda/numba/cuda/compiler.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/numba_cuda/numba/cuda/compiler.py b/numba_cuda/numba/cuda/compiler.py index 81cfa6f0f..f5a038586 100644 --- a/numba_cuda/numba/cuda/compiler.py +++ b/numba_cuda/numba/cuda/compiler.py @@ -1081,10 +1081,8 @@ def _compile_pyfunc_with_fixup( if resty and not device and resty != types.void: raise TypeError("CUDA kernel must have void return type.") - if device: - lib = cres.library - else: - lib = cres.library + lib = cres.library + if not device: kernel = lib.get_function(cres.fndesc.llvm_func_name) lib._entry_name = cres.fndesc.llvm_func_name kernel_fixup(kernel, debug) From f972e09be58e0f2c46c663a5c04ed564b909c1b0 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 21 Jan 2026 10:11:34 -0800 Subject: [PATCH 07/18] move declare_function to fndesc --- numba_cuda/numba/cuda/core/base.py | 17 +---------------- numba_cuda/numba/cuda/core/funcdesc.py | 15 ++++++++++++++- numba_cuda/numba/cuda/core/imputils.py | 4 ++-- numba_cuda/numba/cuda/lowering.py | 2 +- 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py index 4f1e7f388..8fdce6dc3 100644 --- a/numba_cuda/numba/cuda/core/base.py +++ b/numba_cuda/numba/cuda/core/base.py @@ -468,21 +468,6 @@ def get_external_function_type(self, fndesc): fnty = llvmir.FunctionType(restype, argtypes) return fnty - def declare_function(self, module, fndesc): - fnty = fndesc.call_conv.get_function_type( - fndesc.restype, fndesc.argtypes - ) - fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) - fndesc.call_conv.decorate_function( - fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias - ) - if fndesc.inline: - fn.attributes.add("alwaysinline") - # alwaysinline overrides optnone - fn.attributes.discard("noinline") - fn.attributes.discard("optnone") - return fn - def declare_external_function(self, module, fndesc): fnty = self.get_external_function_type(fndesc) fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) @@ -988,7 +973,7 @@ def call_internal_no_propagate(self, builder, fndesc, sig, args): """ # Add call to the generated function llvm_mod = builder.module - fn = self.declare_function(llvm_mod, fndesc) + fn = fndesc.declare_function(llvm_mod) status, res = self.call_conv.call_function( builder, fn, sig.return_type, sig.args, args ) diff --git a/numba_cuda/numba/cuda/core/funcdesc.py b/numba_cuda/numba/cuda/core/funcdesc.py index db6bc7ba1..54047945d 100644 --- a/numba_cuda/numba/cuda/core/funcdesc.py +++ b/numba_cuda/numba/cuda/core/funcdesc.py @@ -8,7 +8,7 @@ from collections import defaultdict import importlib -from numba.cuda import types +from numba.cuda import types, cgutils from numba.cuda import itanium_mangler from numba.cuda.utils import _dynamic_modname, _dynamic_module @@ -265,6 +265,19 @@ def _from_python_function( ) return self + def declare_function(self, module): + fnty = self.call_conv.get_function_type(self.restype, self.argtypes) + fn = cgutils.get_or_insert_function(module, fnty, self.mangled_name) + self.call_conv.decorate_function( + fn, self.args, self.argtypes, noalias=self.noalias + ) + if self.inline: + fn.attributes.add("alwaysinline") + # alwaysinline overrides optnone + fn.attributes.discard("noinline") + fn.attributes.discard("optnone") + return fn + class PythonFunctionDescriptor(FunctionDescriptor): """ diff --git a/numba_cuda/numba/cuda/core/imputils.py b/numba_cuda/numba/cuda/core/imputils.py index 30e19a264..90a507bad 100644 --- a/numba_cuda/numba/cuda/core/imputils.py +++ b/numba_cuda/numba/cuda/core/imputils.py @@ -213,7 +213,7 @@ def user_function(fndesc, libs): """ def imp(context, builder, sig, args): - func = context.declare_function(builder.module, fndesc) + func = fndesc.declare_function(builder.module) # env=None assumes this is a nopython function status, retval = fndesc.call_conv.call_function( builder, func, fndesc.restype, fndesc.argtypes, args @@ -246,7 +246,7 @@ def user_generator(gendesc, libs): """ def imp(context, builder, sig, args): - func = context.declare_function(builder.module, gendesc) + func = gendesc.declare_function(builder.module) # env=None assumes this is a nopython function status, retval = context.call_conv.call_function( builder, func, gendesc.restype, gendesc.argtypes, args diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py index 8bd6b3419..6d68fe82a 100644 --- a/numba_cuda/numba/cuda/lowering.py +++ b/numba_cuda/numba/cuda/lowering.py @@ -344,7 +344,7 @@ def create_cfunc_wrapper(self): def setup_function(self, fndesc): # Setup function - self.function = self.context.declare_function(self.module, fndesc) + self.function = fndesc.declare_function(self.module) if self.flags.dbg_optnone: attrset = self.function.attributes if "alwaysinline" not in attrset: From d699f1c74bf0471d4b1b061a2bee5e877eac3f1e Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 21 Jan 2026 10:12:45 -0800 Subject: [PATCH 08/18] remove CABI optional return comment - no-op --- numba_cuda/numba/cuda/core/imputils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/numba_cuda/numba/cuda/core/imputils.py b/numba_cuda/numba/cuda/core/imputils.py index 90a507bad..2517bb17a 100644 --- a/numba_cuda/numba/cuda/core/imputils.py +++ b/numba_cuda/numba/cuda/core/imputils.py @@ -226,7 +226,6 @@ def imp(context, builder, sig, args): assert sig.return_type == fndesc.restype # Reconstruct optional return type - # XXX: What do we do now when we use CABI? retval = fix_returning_optional(context, builder, sig, status, retval) # If the data representations don't match up if retval.type != context.get_value_type(sig.return_type): From d434e371afcea3141718beb15af381c66528b257 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 21 Jan 2026 10:13:12 -0800 Subject: [PATCH 09/18] cleanups --- numba_cuda/numba/cuda/core/typed_passes.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/numba_cuda/numba/cuda/core/typed_passes.py b/numba_cuda/numba/cuda/core/typed_passes.py index 3dbacdc1b..071fa5fa0 100644 --- a/numba_cuda/numba/cuda/core/typed_passes.py +++ b/numba_cuda/numba/cuda/core/typed_passes.py @@ -335,8 +335,6 @@ def run_pass(self, state): else: mangler = targetctx.mangler - # mangler = targetctx.mangler - msg = "Function %s failed at nopython mode lowering" % ( state.func_id.func_name, ) From 45f90f691d7188662983a1f0fc949786ced1dd05 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 21 Jan 2026 10:43:16 -0800 Subject: [PATCH 10/18] move mangler to callconv --- numba_cuda/numba/cuda/core/callconv.py | 13 +++++++++++++ numba_cuda/numba/cuda/core/typed_passes.py | 7 ++----- numba_cuda/numba/cuda/target.py | 14 +++----------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/numba_cuda/numba/cuda/core/callconv.py b/numba_cuda/numba/cuda/core/callconv.py index 7cffbbf73..44f5e75b6 100644 --- a/numba_cuda/numba/cuda/core/callconv.py +++ b/numba_cuda/numba/cuda/core/callconv.py @@ -3,6 +3,7 @@ from numba.cuda import types from numba.cuda import cgutils +from numba.cuda import itanium_mangler from collections import namedtuple from llvmlite import ir @@ -161,6 +162,11 @@ def _get_arg_packer(self, argtypes): """ return self.context.get_arg_packer(argtypes) + def mangler(self, name, argtypes, *, abi_tags=(), uid=None): + return itanium_mangler.mangle( + name, argtypes, abi_tags=abi_tags, uid=uid + ) + class MinimalCallConv(BaseCallConv): """ @@ -421,6 +427,13 @@ def call_function(self, builder, callee, resty, argtys, args): def get_return_type(self, ty): return self.context.data_model_manager[ty].get_return_type() + def mangler(self, name, argtypes, *, abi_tags=None, uid=None): + if name.startswith(".NumbaEnv."): + # return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags, uid=uid) + func_name = name.split(".")[-1] + return f"_ZN08NumbaEnv{func_name}" + return name.split(".")[-1] + class ErrorModel(object): def __init__(self, call_conv): diff --git a/numba_cuda/numba/cuda/core/typed_passes.py b/numba_cuda/numba/cuda/core/typed_passes.py index 071fa5fa0..d6463ca13 100644 --- a/numba_cuda/numba/cuda/core/typed_passes.py +++ b/numba_cuda/numba/cuda/core/typed_passes.py @@ -31,7 +31,7 @@ compute_cfg_from_blocks, is_operator_or_getitem, ) -from numba.cuda.core.callconv import CUDACABICallConv, CUDACallConv +from numba.cuda.core.callconv import CUDACallConv from numba.cuda.core import postproc, rewrites, funcdesc, config @@ -330,10 +330,7 @@ def run_pass(self, state): if call_conv is None: call_conv = CUDACallConv(state.targetctx) - if isinstance(call_conv, CUDACABICallConv): - mangler = targetctx.c_abi_mangler - else: - mangler = targetctx.mangler + mangler = call_conv.mangler msg = "Function %s failed at nopython mode lowering" % ( state.func_id.func_name, diff --git a/numba_cuda/numba/cuda/target.py b/numba_cuda/numba/cuda/target.py index d8ee64adf..44a7d10b2 100644 --- a/numba_cuda/numba/cuda/target.py +++ b/numba_cuda/numba/cuda/target.py @@ -269,17 +269,9 @@ def nonconst_module_attrs(self): def call_conv(self): return self.fndesc.call_conv - def mangler(self, name, argtypes, *, abi_tags=(), uid=None): - return itanium_mangler.mangle( - name, argtypes, abi_tags=abi_tags, uid=uid - ) - - def c_abi_mangler(self, name, argtypes, *, abi_tags=None, uid=None): - if name.startswith(".NumbaEnv."): - # return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags, uid=uid) - func_name = name.split(".")[-1] - return f"_ZN08NumbaEnv{func_name}" - return name.split(".")[-1] + @property + def mangler(self): + return self.fndesc.call_conv.mangler def make_constant_array(self, builder, aryty, arr): """ From 3475e35b631f8f5e38655b9703cefb1319b22d27 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 21 Jan 2026 10:44:04 -0800 Subject: [PATCH 11/18] document abi arg --- numba_cuda/numba/cuda/decorators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/numba_cuda/numba/cuda/decorators.py b/numba_cuda/numba/cuda/decorators.py index f63a8f68b..c414152eb 100644 --- a/numba_cuda/numba/cuda/decorators.py +++ b/numba_cuda/numba/cuda/decorators.py @@ -275,6 +275,7 @@ def declare_device(name, sig, link=None, use_cooperative=False, abi="numba"): :param sig: The Numba signature of the function. :param link: External code to link when calling the function. :param use_cooperative: External code requires cooperative launch. + :param abi: The ABI to use for the function. "numba" for Numba ABI, "c" for C ABI. """ if abi not in ("numba", "c"): raise NotImplementedError(f"Unsupported ABI: {abi}") From 1f39f7bf642b3cb2f476e739efd0d50ce2d307d6 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 21 Jan 2026 10:45:39 -0800 Subject: [PATCH 12/18] remove stale comments --- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py index 01a2c0b5e..f8fdad056 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py @@ -496,8 +496,6 @@ def kernel(r, x): np.testing.assert_equal(r, x * 5) -# XXX: Range from 0 - 2 input arguments -# XXX: void return type? times2_cabi_cu = cuda.CUSource(""" extern "C" __device__ int times2(int a) From b93f046d6cec0d0fd9ba206f5303e9774c6fe491 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 21 Jan 2026 11:05:10 -0800 Subject: [PATCH 13/18] add cpointer argument --- .../cuda/tests/cudapy/test_device_func.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py index f8fdad056..6698e6337 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_device_func.py @@ -530,6 +530,14 @@ def kernel(r, x): } """) +consume_cabi_pointer_cu = cuda.CUSource(""" +extern "C" __device__ +void consume(int *a) +{ + *a = 42; +} +""") + @skip_on_cudasim("External functions unsupported in the simulator") class TestDeclareDeviceCABI(CUDATestCase): @@ -600,6 +608,27 @@ def kernel(r, x): kernel[1, 32](r, x) np.testing.assert_equal(r, x * 2) + def test_declare_device_cabi_pointer_return(self): + ffi = cffi.FFI() + consume = cuda.declare_device( + "consume", + types.void(types.CPointer(types.int32)), + link=consume_cabi_pointer_cu, + abi="c", + ) + + @cuda.jit + def kernel(x): + i = cuda.grid(1) + if i < len(x): + ptr = ffi.from_buffer(x[i:]) + consume(ptr) + + x = np.zeros(10, dtype=np.int32) + + kernel[1, 32](x) + np.testing.assert_equal(x, 42) + if __name__ == "__main__": unittest.main() From 684ad4f82a46078626f2a8cd94c155f4b4ad64f5 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 22 Jan 2026 10:55:26 -0800 Subject: [PATCH 14/18] remove context.call_conv property --- numba_cuda/numba/cuda/cgutils.py | 8 ++--- numba_cuda/numba/cuda/core/base.py | 4 +-- numba_cuda/numba/cuda/core/compiler.py | 1 - numba_cuda/numba/cuda/core/imputils.py | 8 ++--- numba_cuda/numba/cuda/core/optional.py | 2 +- numba_cuda/numba/cuda/core/pythonapi.py | 4 +-- numba_cuda/numba/cuda/cpython/builtins.py | 2 +- numba_cuda/numba/cuda/cpython/charseq.py | 6 ++-- numba_cuda/numba/cuda/cpython/iterators.py | 2 +- numba_cuda/numba/cuda/cpython/listobj.py | 14 +++++--- numba_cuda/numba/cuda/cpython/rangeobj.py | 2 +- numba_cuda/numba/cuda/cpython/slicing.py | 4 +-- numba_cuda/numba/cuda/cpython/tupleobj.py | 16 ++++++--- numba_cuda/numba/cuda/lowering.py | 2 +- .../cuda/memory_management/nrt_context.py | 6 ++-- numba_cuda/numba/cuda/misc/gdb_hook.py | 4 +-- numba_cuda/numba/cuda/np/arrayobj.py | 34 ++++++++++++------- numba_cuda/numba/cuda/np/npyimpl.py | 10 +++--- .../cuda/np/polynomial/polynomial_core.py | 4 +-- numba_cuda/numba/cuda/target.py | 4 --- 20 files changed, 77 insertions(+), 60 deletions(-) diff --git a/numba_cuda/numba/cuda/cgutils.py b/numba_cuda/numba/cuda/cgutils.py index 9fbadfff8..aa79662a7 100644 --- a/numba_cuda/numba/cuda/cgutils.py +++ b/numba_cuda/numba/cuda/cgutils.py @@ -763,12 +763,12 @@ def _dbg(): with if_unlikely(builder, out_of_bounds_upper): if config.FULL_TRACEBACKS: _dbg() - context.call_conv.return_user_exc(builder, IndexError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,)) out_of_bounds_lower = builder.icmp_signed("<", ind, ind.type(0)) with if_unlikely(builder, out_of_bounds_lower): if config.FULL_TRACEBACKS: _dbg() - context.call_conv.return_user_exc(builder, IndexError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,)) def get_item_pointer2( @@ -935,7 +935,7 @@ def guard_null(context, builder, value, exc_tuple): with builder.if_then(is_scalar_zero(builder, value), likely=False): exc = exc_tuple[0] exc_args = exc_tuple[1:] or None - context.call_conv.return_user_exc(builder, exc, exc_args) + context.fndesc.call_conv.return_user_exc(builder, exc, exc_args) def guard_memory_error(context, builder, pointer, msg=None): @@ -945,7 +945,7 @@ def guard_memory_error(context, builder, pointer, msg=None): assert isinstance(pointer.type, ir.PointerType), pointer.type exc_args = (msg,) if msg else () with builder.if_then(is_null(builder, pointer), likely=False): - context.call_conv.return_user_exc(builder, MemoryError, exc_args) + context.fndesc.call_conv.return_user_exc(builder, MemoryError, exc_args) @contextmanager diff --git a/numba_cuda/numba/cuda/core/base.py b/numba_cuda/numba/cuda/core/base.py index 8fdce6dc3..0afb90bae 100644 --- a/numba_cuda/numba/cuda/core/base.py +++ b/numba_cuda/numba/cuda/core/base.py @@ -962,7 +962,7 @@ def call_internal(self, builder, fndesc, sig, args): builder, fndesc, sig, args ) with cgutils.if_unlikely(builder, status.is_error): - self.call_conv.return_status_propagate(builder, status) + fndesc.call_conv.return_status_propagate(builder, status) res = imputils.fix_returning_optional(self, builder, sig, status, res) return res @@ -974,7 +974,7 @@ def call_internal_no_propagate(self, builder, fndesc, sig, args): # Add call to the generated function llvm_mod = builder.module fn = fndesc.declare_function(llvm_mod) - status, res = self.call_conv.call_function( + status, res = fndesc.call_conv.call_function( builder, fn, sig.return_type, sig.args, args ) return status, res diff --git a/numba_cuda/numba/cuda/core/compiler.py b/numba_cuda/numba/cuda/core/compiler.py index 42e5f136c..0db2d4071 100644 --- a/numba_cuda/numba/cuda/core/compiler.py +++ b/numba_cuda/numba/cuda/core/compiler.py @@ -69,7 +69,6 @@ def _make_subtarget(targetctx, flags): if flags.fastmath: subtargetoptions["fastmath"] = flags.fastmath - # FIXME: should update everywhere uses error_model to use callconv from fundesc call_conv = CUDACallConv(targetctx) error_model = callconv.create_error_model(flags.error_model, call_conv) subtargetoptions["error_model"] = error_model diff --git a/numba_cuda/numba/cuda/core/imputils.py b/numba_cuda/numba/cuda/core/imputils.py index 2517bb17a..31c12a733 100644 --- a/numba_cuda/numba/cuda/core/imputils.py +++ b/numba_cuda/numba/cuda/core/imputils.py @@ -212,7 +212,7 @@ def user_function(fndesc, libs): A wrapper inserting code calling Numba-compiled *fndesc*. """ - def imp(context, builder, sig, args): + def imp(context, builder, sig, args, fndesc=fndesc): func = fndesc.declare_function(builder.module) # env=None assumes this is a nopython function status, retval = fndesc.call_conv.call_function( @@ -221,7 +221,7 @@ def imp(context, builder, sig, args): if status is not None: with cgutils.if_unlikely(builder, status.is_error): - context.call_conv.return_status_propagate(builder, status) + fndesc.call_conv.return_status_propagate(builder, status) assert sig.return_type == fndesc.restype @@ -244,10 +244,10 @@ def user_generator(gendesc, libs): A wrapper inserting code calling Numba-compiled *gendesc*. """ - def imp(context, builder, sig, args): + def imp(context, builder, sig, args, gendesc=gendesc): func = gendesc.declare_function(builder.module) # env=None assumes this is a nopython function - status, retval = context.call_conv.call_function( + status, retval = gendesc.call_conv.call_function( builder, func, gendesc.restype, gendesc.argtypes, args ) # Return raw status for caller to process StopIteration diff --git a/numba_cuda/numba/cuda/core/optional.py b/numba_cuda/numba/cuda/core/optional.py index 8cad47935..e7880e7e0 100644 --- a/numba_cuda/numba/cuda/core/optional.py +++ b/numba_cuda/numba/cuda/core/optional.py @@ -124,6 +124,6 @@ def optional_to_any(context, builder, fromty, toty, val): validbit = cgutils.as_bool_bit(builder, optval.valid) with builder.if_then(builder.not_(validbit), likely=False): msg = "expected %s, got None" % (fromty.type,) - context.call_conv.return_user_exc(builder, TypeError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, TypeError, (msg,)) return context.cast(builder, optval.data, fromty.type, toty) diff --git a/numba_cuda/numba/cuda/core/pythonapi.py b/numba_cuda/numba/cuda/core/pythonapi.py index 1e4fa5ed1..a97f45b7a 100644 --- a/numba_cuda/numba/cuda/core/pythonapi.py +++ b/numba_cuda/numba/cuda/core/pythonapi.py @@ -232,7 +232,7 @@ def emit_environment_sentry( ) self.builder.ret(self.get_null_object()) else: - self.context.call_conv.return_user_exc( + self.context.fndesc.call_conv.return_user_exc( self.builder, RuntimeError, (f"missing Environment: {debug_msg}",), @@ -1773,7 +1773,7 @@ def call_jit_code(self, func, sig, args): with has_err: builder.store(status.is_error, is_error_ptr) # Set error state in the Python interpreter - self.context.call_conv.raise_error(builder, self, status) + self.context.fndesc.call_conv.raise_error(builder, self, status) with no_err: # Handle returned value res = imputils.fix_returning_optional( diff --git a/numba_cuda/numba/cuda/cpython/builtins.py b/numba_cuda/numba/cuda/cpython/builtins.py index 8a5eeac09..71d833956 100644 --- a/numba_cuda/numba/cuda/cpython/builtins.py +++ b/numba_cuda/numba/cuda/cpython/builtins.py @@ -446,7 +446,7 @@ def next_impl(context, builder, sig, args): res = call_iternext(context, builder, iterty, iterval) with builder.if_then(builder.not_(res.is_valid()), likely=False): - context.call_conv.return_user_exc(builder, StopIteration, ()) + context.fndesc.call_conv.return_user_exc(builder, StopIteration, ()) return res.yielded_value() diff --git a/numba_cuda/numba/cuda/cpython/charseq.py b/numba_cuda/numba/cuda/cpython/charseq.py index d6738a6f1..b964aecc9 100644 --- a/numba_cuda/numba/cuda/cpython/charseq.py +++ b/numba_cuda/numba/cuda/cpython/charseq.py @@ -244,7 +244,7 @@ def unicode_to_bytes_cast(context, builder, fromty, toty, val): src_length = uni_str.length with builder.if_then(notkind1): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("cannot cast higher than 8-bit unicode_type to bytes",), @@ -315,7 +315,7 @@ def unicode_to_unicode_charseq(context, builder, fromty, toty, val): in_val = builder.zext(builder.load(in_ptr), dstint_t) builder.store(in_val, builder.gep(dst, [loop.index])) else: - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ( @@ -331,7 +331,7 @@ def unicode_to_unicode_charseq(context, builder, fromty, toty, val): in_val = builder.zext(builder.load(in_ptr), dstint_t) builder.store(in_val, builder.gep(dst, [loop.index])) else: - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ( diff --git a/numba_cuda/numba/cuda/cpython/iterators.py b/numba_cuda/numba/cuda/cpython/iterators.py index df7a87b75..e3606070f 100644 --- a/numba_cuda/numba/cuda/cpython/iterators.py +++ b/numba_cuda/numba/cuda/cpython/iterators.py @@ -164,4 +164,4 @@ def iternext_zip(context, builder, sig, args, result): # noqa: F811 builder, builder.and_(status.is_error, builder.not_(status.is_stop_iteration)), ): - context.call_conv.return_status_propagate(builder, status) + context.fndesc.call_conv.return_status_propagate(builder, status) diff --git a/numba_cuda/numba/cuda/cpython/listobj.py b/numba_cuda/numba/cuda/cpython/listobj.py index ee059a8e7..b5055a840 100644 --- a/numba_cuda/numba/cuda/cpython/listobj.py +++ b/numba_cuda/numba/cuda/cpython/listobj.py @@ -117,7 +117,7 @@ def guard_index(self, idx, msg): Raise an error if the index is out of bounds. """ with self._builder.if_then(self.is_out_of_bounds(idx), likely=False): - self._context.call_conv.return_user_exc( + self._context.fndesc.call_conv.return_user_exc( self._builder, IndexError, (msg,) ) @@ -347,7 +347,7 @@ def allocate(cls, context, builder, list_type, nitems): """ ok, self = cls.allocate_ex(context, builder, list_type, nitems) with builder.if_then(builder.not_(ok), likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, MemoryError, ("cannot allocate list",) ) return self @@ -384,7 +384,7 @@ def _payload_realloc(new_allocated): ir.Constant(intp_t, payload_size), ) with builder.if_then(ovf, likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, MemoryError, ("cannot resize list",) ) @@ -654,7 +654,9 @@ def setitem_list(context, builder, sig, args): # noqa: F811 with otherwise: with builder.if_then(builder.icmp_signed("!=", size_delta, zero)): msg = "cannot resize extended list slice with step != 1" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, ValueError, (msg,) + ) with cgutils.for_range_slice_generic( builder, slice.start, slice.stop, slice.step @@ -693,7 +695,9 @@ def delitem_list(context, builder, sig, args): builder.icmp_signed("!=", slice.step, one), likely=False ): msg = "unsupported del list[start:stop:step] with step != 1" - context.call_conv.return_user_exc(builder, NotImplementedError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, NotImplementedError, (msg,) + ) # Compute the real stop, e.g. for dest[2:0] start = slice.start diff --git a/numba_cuda/numba/cuda/cpython/rangeobj.py b/numba_cuda/numba/cuda/cpython/rangeobj.py index 00a75cba8..99b7903d1 100644 --- a/numba_cuda/numba/cuda/cpython/rangeobj.py +++ b/numba_cuda/numba/cuda/cpython/rangeobj.py @@ -158,7 +158,7 @@ def from_range_state(cls, context, builder, state): with cgutils.if_unlikely(builder, zero_step): # step shouldn't be zero - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("range() arg 3 must not be zero",) ) diff --git a/numba_cuda/numba/cuda/cpython/slicing.py b/numba_cuda/numba/cuda/cpython/slicing.py index 0899db994..ed9ae163f 100644 --- a/numba_cuda/numba/cuda/cpython/slicing.py +++ b/numba_cuda/numba/cuda/cpython/slicing.py @@ -240,13 +240,13 @@ def slice_indices(context, builder, sig, args): sli = context.make_helper(builder, sig.args[0], args[0]) with builder.if_then(cgutils.is_neg_int(builder, length), likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("length should not be negative",) ) with builder.if_then( cgutils.is_scalar_zero(builder, sli.step), likely=False ): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("slice step cannot be zero",) ) diff --git a/numba_cuda/numba/cuda/cpython/tupleobj.py b/numba_cuda/numba/cuda/cpython/tupleobj.py index 5292d412d..0483a3960 100644 --- a/numba_cuda/numba/cuda/cpython/tupleobj.py +++ b/numba_cuda/numba/cuda/cpython/tupleobj.py @@ -251,7 +251,9 @@ def getitem_typed(context, builder, sig, args): # Always branch and raise IndexError with builder.if_then(cgutils.true_bit): - context.call_conv.return_user_exc(builder, IndexError, errmsg_oob) + context.fndesc.call_conv.return_user_exc( + builder, IndexError, errmsg_oob + ) # This is unreachable in runtime, # but it exists to not terminate the current basicblock. res = context.get_constant_null(sig.return_type) @@ -264,7 +266,9 @@ def getitem_typed(context, builder, sig, args): switch = builder.switch(idx, bbelse) with builder.goto_block(bbelse): - context.call_conv.return_user_exc(builder, IndexError, errmsg_oob) + context.fndesc.call_conv.return_user_exc( + builder, IndexError, errmsg_oob + ) lrtty = context.get_value_type(sig.return_type) voidptrty = context.get_value_type(types.voidptr) @@ -345,7 +349,9 @@ def getitem_unituple(context, builder, sig, args): # Always branch and raise IndexError with builder.if_then(cgutils.true_bit): - context.call_conv.return_user_exc(builder, IndexError, errmsg_oob) + context.fndesc.call_conv.return_user_exc( + builder, IndexError, errmsg_oob + ) # This is unreachable in runtime, # but it exists to not terminate the current basicblock. res = context.get_constant_null(sig.return_type) @@ -357,7 +363,9 @@ def getitem_unituple(context, builder, sig, args): switch = builder.switch(idx, bbelse) with builder.goto_block(bbelse): - context.call_conv.return_user_exc(builder, IndexError, errmsg_oob) + context.fndesc.call_conv.return_user_exc( + builder, IndexError, errmsg_oob + ) lrtty = context.get_value_type(tupty.dtype) with builder.goto_block(bbend): diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py index 6d68fe82a..7b6afbf90 100644 --- a/numba_cuda/numba/cuda/lowering.py +++ b/numba_cuda/numba/cuda/lowering.py @@ -99,7 +99,7 @@ def __init__(self, context, library, fndesc, func_ir, metadata=None): @property def call_conv(self): - return self.context.call_conv + return self.fndesc.call_conv def init(self): pass diff --git a/numba_cuda/numba/cuda/memory_management/nrt_context.py b/numba_cuda/numba/cuda/memory_management/nrt_context.py index 208412d3b..99c74c42e 100644 --- a/numba_cuda/numba/cuda/memory_management/nrt_context.py +++ b/numba_cuda/numba/cuda/memory_management/nrt_context.py @@ -415,7 +415,7 @@ def get_nrt_api(self, builder): def eh_check(self, builder): """Check if an exception is raised""" ctx = self._context - cc = ctx.call_conv + cc = ctx.fndesc.call_conv # Inspect the excinfo argument on the function trystatus = cc.check_try_status(builder) excinfo = trystatus.excinfo @@ -428,11 +428,11 @@ def eh_check(self, builder): def eh_try(self, builder): """Begin a try-block.""" ctx = self._context - cc = ctx.call_conv + cc = ctx.fndesc.call_conv cc.set_try_status(builder) def eh_end_try(self, builder): """End a try-block""" ctx = self._context - cc = ctx.call_conv + cc = ctx.fndesc.call_conv cc.unset_try_status(builder) diff --git a/numba_cuda/numba/cuda/misc/gdb_hook.py b/numba_cuda/numba/cuda/misc/gdb_hook.py index 2c8bd3c8d..805f952e7 100644 --- a/numba_cuda/numba/cuda/misc/gdb_hook.py +++ b/numba_cuda/numba/cuda/misc/gdb_hook.py @@ -162,14 +162,14 @@ def init_gdb_codegen( invalid_write = builder.icmp_signed(">", stat, int32_t(12)) with builder.if_then(invalid_write, likely=False): msg = "Internal error: `snprintf` buffer would have overflowed." - cgctx.call_conv.return_user_exc(builder, RuntimeError, (msg,)) + cgctx.fndesc.call_conv.return_user_exc(builder, RuntimeError, (msg,)) # fork, check pids etc child_pid = builder.call(fork, tuple()) fork_failed = builder.icmp_signed("==", child_pid, int32_t(-1)) with builder.if_then(fork_failed, likely=False): msg = "Internal error: `fork` failed." - cgctx.call_conv.return_user_exc(builder, RuntimeError, (msg,)) + cgctx.fndesc.call_conv.return_user_exc(builder, RuntimeError, (msg,)) is_child = builder.icmp_signed("==", child_pid, zero_i32t) with builder.if_else(is_child) as (then, orelse): diff --git a/numba_cuda/numba/cuda/np/arrayobj.py b/numba_cuda/numba/cuda/np/arrayobj.py index 66c69920c..ca2af1b77 100644 --- a/numba_cuda/numba/cuda/np/arrayobj.py +++ b/numba_cuda/numba/cuda/np/arrayobj.py @@ -672,7 +672,7 @@ def array_item(context, builder, sig, args): builder.icmp_signed("!=", nitems, nitems.type(1)), likely=False ): msg = "item(): can only convert an array of size 1 to a Python scalar" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, ValueError, (msg,)) return load_item(context, builder, aryty, ary.data) @@ -691,7 +691,9 @@ def array_itemset(context, builder, sig, args): builder.icmp_signed("!=", nitems, nitems.type(1)), likely=False ): msg = "itemset(): can only write to an array of size 1" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, ValueError, (msg,) + ) store_item(context, builder, aryty, val, ary.data) return context.get_dummy_value() @@ -1478,7 +1480,9 @@ def _bc_adjust_dimension(context, builder, shapes, strides, target_shape): # Check error with builder.if_then(builder.not_(accepted), likely=False): msg = "cannot broadcast source array for assignment" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, ValueError, (msg,) + ) # Truncate extra shapes, strides shapes = shapes[nd_diff:] strides = strides[nd_diff:] @@ -2426,7 +2430,9 @@ def array_reshape(context, builder, sig, args): with builder.if_then(fail): msg = "incompatible shape for array" - context.call_conv.return_user_exc(builder, NotImplementedError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, NotImplementedError, (msg,) + ) ret = make_array(retty)(context, builder) populate_array( @@ -3099,7 +3105,7 @@ def array_view(context, builder, sig, args): with builder.if_then(fail): msg = "new type not compatible with array" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, ValueError, (msg,)) res = ret._getvalue() return impl_ret_borrowed(context, builder, sig.return_type, res) @@ -4719,7 +4725,7 @@ def _empty_nd_impl(context, builder, arrtype, shapes): with builder.if_then(overflow, likely=False): # Raise same error as numpy, see: # https://github.com/numpy/numpy/blob/2a488fe76a0f732dc418d03b452caace161673da/numpy/core/src/multiarray/ctors.c#L1095-L1101 # noqa: E501 - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ( @@ -4803,7 +4809,7 @@ def safecast_intp(context, builder, src_t, src): elif src_t.width >= intp_width: is_larger = builder.icmp_signed(">", src, maxval) with builder.if_then(is_larger, likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("Cannot safely convert value to intp",), @@ -4828,7 +4834,7 @@ def safecast_intp(context, builder, src_t, src): for shape in shapes: is_neg = builder.icmp_signed("<", shape, zero) with cgutils.if_unlikely(builder, is_neg): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("negative dimensions not allowed",) ) @@ -5758,7 +5764,9 @@ def codegen(context, builder, sig, args): is_incompatible = cgutils.is_not_null(builder, rem) with builder.if_then(is_incompatible, likely=False): msg = "buffer size must be a multiple of element size" - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, ValueError, (msg,) + ) shape = cgutils.pack_array(builder, [builder.sdiv(nbytes, ll_itemsize)]) strides = cgutils.pack_array(builder, [ll_itemsize]) @@ -6013,7 +6021,7 @@ def check_sequence_shape(context, builder, seqty, seq, shapes): """ def _fail(): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("incompatible sequence shape",) ) @@ -6168,7 +6176,7 @@ def _normalize_axis(context, builder, func_name, ndim, axis): ) with builder.if_then(axis_out_of_bounds, likely=False): msg = "%s(): axis out of bounds" % func_name - context.call_conv.return_user_exc(builder, IndexError, (msg,)) + context.fndesc.call_conv.return_user_exc(builder, IndexError, (msg,)) return axis @@ -6488,7 +6496,7 @@ def _np_concatenate(context, builder, arrtys, arrs, retty, axis): is_ok, builder.icmp_signed("==", sh, ret_sh) ) with builder.if_then(builder.not_(is_ok), likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ( @@ -6537,7 +6545,7 @@ def _np_stack(context, builder, arrtys, arrs, retty, axis): ): is_ok = builder.and_(is_ok, builder.icmp_signed("==", sh, orig_sh)) with builder.if_then(builder.not_(is_ok), likely=False): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("np.stack(): all input arrays must have the same shape",), diff --git a/numba_cuda/numba/cuda/np/npyimpl.py b/numba_cuda/numba/cuda/np/npyimpl.py index 472f55645..be7c0d007 100644 --- a/numba_cuda/numba/cuda/np/npyimpl.py +++ b/numba_cuda/numba/cuda/np/npyimpl.py @@ -507,7 +507,9 @@ def make_intp_const(val): if loc is not None: msg += '\nFile "%s", line %d, ' % (loc.filename, loc.line) - context.call_conv.return_user_exc(builder, ValueError, (msg,)) + context.fndesc.call_conv.return_user_exc( + builder, ValueError, (msg,) + ) real_array_ty = array_ty.as_array @@ -799,11 +801,11 @@ def generate(self, *args): for val, inty, outty in zip(args, osig.args, isig.args) ] if self.cres.objectmode: - func_type = self.context.call_conv.get_function_type( + func_type = self.context.fndesc.call_conv.get_function_type( types.pyobject, [types.pyobject] * len(isig.args) ) else: - func_type = self.context.call_conv.get_function_type( + func_type = self.context.fndesc.call_conv.get_function_type( isig.return_type, isig.args ) module = self.builder.block.function.module @@ -812,7 +814,7 @@ def generate(self, *args): ) entry_point.attributes.add("alwaysinline") - _, res = self.context.call_conv.call_function( + _, res = self.context.fndesc.call_conv.call_function( self.builder, entry_point, isig.return_type, isig.args, cast_args ) return self.cast(res, isig.return_type, osig.return_type) diff --git a/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py b/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py index a8a28f8f2..e10fef477 100644 --- a/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +++ b/numba_cuda/numba/cuda/np/polynomial/polynomial_core.py @@ -149,12 +149,12 @@ def to_double(coef): pred2 = builder.icmp_signed("!=", s2, two) with cgutils.if_unlikely(builder, pred1): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("Domain has wrong number of elements.",) ) with cgutils.if_unlikely(builder, pred2): - context.call_conv.return_user_exc( + context.fndesc.call_conv.return_user_exc( builder, ValueError, ("Window has wrong number of elements.",) ) diff --git a/numba_cuda/numba/cuda/target.py b/numba_cuda/numba/cuda/target.py index 44a7d10b2..2622c1397 100644 --- a/numba_cuda/numba/cuda/target.py +++ b/numba_cuda/numba/cuda/target.py @@ -265,10 +265,6 @@ def nonconst_module_attrs(self): ) return nonconsts_with_mod - @property - def call_conv(self): - return self.fndesc.call_conv - @property def mangler(self): return self.fndesc.call_conv.mangler From 27199d2d24afc2ff55f68446867cd4152d40cc53 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 22 Jan 2026 13:16:34 -0800 Subject: [PATCH 15/18] use callconv mangler everywhere --- numba_cuda/numba/cuda/core/generators.py | 2 +- numba_cuda/numba/cuda/cpython/numbers.py | 3 ++- numba_cuda/numba/cuda/lowering.py | 2 +- numba_cuda/numba/cuda/np/math/numbers.py | 3 ++- numba_cuda/numba/cuda/target.py | 4 ---- 5 files changed, 6 insertions(+), 8 deletions(-) diff --git a/numba_cuda/numba/cuda/core/generators.py b/numba_cuda/numba/cuda/core/generators.py index f296d6b04..5a85b4f91 100644 --- a/numba_cuda/numba/cuda/core/generators.py +++ b/numba_cuda/numba/cuda/core/generators.py @@ -76,7 +76,7 @@ def __init__(self, lower): self.geninfo = lower.generator_info self.gentype = self.get_generator_type() self.gendesc = GeneratorDescriptor.from_generator_fndesc( - lower.func_ir, self.fndesc, self.gentype, self.context.mangler + lower.func_ir, self.fndesc, self.gentype, self.call_conv.mangler ) # Helps packing non-omitted arguments into a structure self.arg_packer = self.context.get_data_packer(self.fndesc.argtypes) diff --git a/numba_cuda/numba/cuda/cpython/numbers.py b/numba_cuda/numba/cuda/cpython/numbers.py index d3a24b934..4ffe3ad86 100644 --- a/numba_cuda/numba/cuda/cpython/numbers.py +++ b/numba_cuda/numba/cuda/cpython/numbers.py @@ -671,7 +671,8 @@ def real_divmod(context, builder, x, y): floatty = x.type module = builder.module - fname = context.mangler(".numba.python.rem", [x.type]) + call_conv = context.fndesc.call_conv + fname = call_conv.mangler(".numba.python.rem", [x.type]) fnty = ir.FunctionType(floatty, (floatty, floatty, ir.PointerType(floatty))) fn = cgutils.get_or_insert_function(module, fnty, fname) diff --git a/numba_cuda/numba/cuda/lowering.py b/numba_cuda/numba/cuda/lowering.py index 7b6afbf90..effa6d32c 100644 --- a/numba_cuda/numba/cuda/lowering.py +++ b/numba_cuda/numba/cuda/lowering.py @@ -1197,7 +1197,7 @@ def _lower_call_RecursiveCall(self, fnty, expr, signature): expr.kws, ) rec_ov = fnty.get_overloads(signature.args) - mangler = self.context.mangler or default_mangler + mangler = self.call_conv.mangler or default_mangler abi_tags = self.fndesc.abi_tags mangled_name = mangler( rec_ov.qualname, signature.args, abi_tags=abi_tags, uid=rec_ov.uid diff --git a/numba_cuda/numba/cuda/np/math/numbers.py b/numba_cuda/numba/cuda/np/math/numbers.py index cf0875b9c..b4b20cd49 100644 --- a/numba_cuda/numba/cuda/np/math/numbers.py +++ b/numba_cuda/numba/cuda/np/math/numbers.py @@ -657,7 +657,8 @@ def real_divmod(context, builder, x, y): floatty = x.type module = builder.module - fname = context.mangler(".numba.python.rem", [x.type]) + call_conv = context.fndesc.call_conv + fname = call_conv.mangler(".numba.python.rem", [x.type]) fnty = ir.FunctionType(floatty, (floatty, floatty, ir.PointerType(floatty))) fn = cgutils.get_or_insert_function(module, fnty, fname) diff --git a/numba_cuda/numba/cuda/target.py b/numba_cuda/numba/cuda/target.py index 2622c1397..4141ecf7f 100644 --- a/numba_cuda/numba/cuda/target.py +++ b/numba_cuda/numba/cuda/target.py @@ -265,10 +265,6 @@ def nonconst_module_attrs(self): ) return nonconsts_with_mod - @property - def mangler(self): - return self.fndesc.call_conv.mangler - def make_constant_array(self, builder, aryty, arr): """ Unlike the parent version. This returns a a pointer in the constant From 864a40cb31d49d804d2dc54f3f9c9a0b0046706c Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Thu, 22 Jan 2026 13:18:57 -0800 Subject: [PATCH 16/18] cleanups --- numba_cuda/numba/cuda/core/callconv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/numba_cuda/numba/cuda/core/callconv.py b/numba_cuda/numba/cuda/core/callconv.py index 44f5e75b6..a9489a892 100644 --- a/numba_cuda/numba/cuda/core/callconv.py +++ b/numba_cuda/numba/cuda/core/callconv.py @@ -429,7 +429,6 @@ def get_return_type(self, ty): def mangler(self, name, argtypes, *, abi_tags=None, uid=None): if name.startswith(".NumbaEnv."): - # return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags, uid=uid) func_name = name.split(".")[-1] return f"_ZN08NumbaEnv{func_name}" return name.split(".")[-1] From 15dcbe506ad1d2b0258534d676aa0beef98fdea5 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 26 Jan 2026 09:53:27 -0800 Subject: [PATCH 17/18] address review comments --- numba_cuda/numba/cuda/core/compiler.py | 2 ++ numba_cuda/numba/cuda/core/generators.py | 5 ++++- numba_cuda/numba/cuda/target.py | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/numba_cuda/numba/cuda/core/compiler.py b/numba_cuda/numba/cuda/core/compiler.py index 0db2d4071..af8c26ee3 100644 --- a/numba_cuda/numba/cuda/core/compiler.py +++ b/numba_cuda/numba/cuda/core/compiler.py @@ -69,6 +69,8 @@ def _make_subtarget(targetctx, flags): if flags.fastmath: subtargetoptions["fastmath"] = flags.fastmath + # Only the CUDA Calling convention can raise exceptions, so we assume here + # that it is suitable for creating the error model. call_conv = CUDACallConv(targetctx) error_model = callconv.create_error_model(flags.error_model, call_conv) subtargetoptions["error_model"] = error_model diff --git a/numba_cuda/numba/cuda/core/generators.py b/numba_cuda/numba/cuda/core/generators.py index 5a85b4f91..96f22ad7d 100644 --- a/numba_cuda/numba/cuda/core/generators.py +++ b/numba_cuda/numba/cuda/core/generators.py @@ -76,7 +76,10 @@ def __init__(self, lower): self.geninfo = lower.generator_info self.gentype = self.get_generator_type() self.gendesc = GeneratorDescriptor.from_generator_fndesc( - lower.func_ir, self.fndesc, self.gentype, self.call_conv.mangler + lower.func_ir, + self.fndesc, + self.gentype, + self.fndesc.call_conv.mangler, ) # Helps packing non-omitted arguments into a structure self.arg_packer = self.context.get_data_packer(self.fndesc.argtypes) diff --git a/numba_cuda/numba/cuda/target.py b/numba_cuda/numba/cuda/target.py index 4141ecf7f..3909ac003 100644 --- a/numba_cuda/numba/cuda/target.py +++ b/numba_cuda/numba/cuda/target.py @@ -265,6 +265,15 @@ def nonconst_module_attrs(self): ) return nonconsts_with_mod + @property + def call_conv(self): + warnings.warn( + "Context.call_conv is deprecated. " + "Use FunctionDescriptor.call_conv instead.", + DeprecationWarning, + ) + return self.fndesc.call_conv + def make_constant_array(self, builder, aryty, arr): """ Unlike the parent version. This returns a a pointer in the constant From b46435a7df5e05aad816de5df554d68e3cbad996 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 3 Feb 2026 11:37:20 -0800 Subject: [PATCH 18/18] ignore call_conv deprecation warning --- ci/test_thirdparty_awkward.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ci/test_thirdparty_awkward.sh b/ci/test_thirdparty_awkward.sh index 6349ac1c2..b1ba442f3 100755 --- a/ci/test_thirdparty_awkward.sh +++ b/ci/test_thirdparty_awkward.sh @@ -74,6 +74,21 @@ index 39080a34..0eb3940f 100644 array = rng.integers(50, size=1000) EOF +patch -p1 <<'EOF' +diff --git a/pyproject.toml b/pyproject.toml +index 78ecfba9..80a25474 100644 +--- a/pyproject.toml ++++ b/pyproject.toml +@@ -136,6 +136,7 @@ filterwarnings = [ + "ignore:.*np\\.MachAr.*:DeprecationWarning", + "ignore:module 'sre_.*' is deprecated:DeprecationWarning", + "ignore:Jitify is performing a one-time only warm-up", ++ "ignore:Context.call_conv is deprecated.", + ] + log_cli_level = "INFO" + testpaths = ["tests", "tests-cuda", "tests-cuda-kernels", "tests-cuda-kernels-explicit"] +EOF + rapids-logger "Generate awkward tests" nox -s prepare -- --tests